2003-05-18 14:14:47 +00:00
/ *
The contents of this file are subject to the THDL Open Community License
Version 1 . 0 ( the " License " ) ; you may not use this file except in compliance
with the License . You may obtain a copy of the License on the THDL web site
( http : //www.thdl.org/).
Software distributed under the License is distributed on an " AS IS " basis ,
WITHOUT WARRANTY OF ANY KIND , either express or implied . See the
License for the specific terms governing rights and limitations under the
License .
The Initial Developer of this software is the Tibetan and Himalayan Digital
Library ( THDL ) . Portions created by the THDL are Copyright 2003 THDL .
All Rights Reserved .
Contributor ( s ) : ______________________________________ .
* /
package org.thdl.tib.input ;
2005-07-17 03:32:57 +00:00
import java.io.BufferedReader ;
2005-07-11 03:10:32 +00:00
import java.io.BufferedWriter ;
import java.io.FileInputStream ;
import java.io.IOException ;
import java.io.InputStream ;
2005-07-17 03:32:57 +00:00
import java.io.InputStreamReader ;
2005-07-11 03:10:32 +00:00
import java.io.OutputStreamWriter ;
import java.io.PrintStream ;
2007-05-01 02:38:45 +00:00
import java.io.PrintWriter ;
2005-07-11 03:10:32 +00:00
import java.util.ArrayList ;
2003-07-01 03:43:33 +00:00
import javax.swing.text.SimpleAttributeSet ;
import javax.swing.text.StyleConstants ;
2005-07-11 03:10:32 +00:00
import javax.swing.text.rtf.RTFEditorKit ;
2003-05-18 14:14:47 +00:00
2005-07-11 03:10:32 +00:00
import org.thdl.tib.text.TibetanDocument ;
2005-07-17 03:32:57 +00:00
import org.thdl.tib.text.reverter.Converter ;
2005-02-22 04:36:54 +00:00
import org.thdl.tib.text.ttt.ACIPTraits ;
2005-06-20 09:30:35 +00:00
import org.thdl.tib.text.ttt.EWTSTraits ;
2005-07-11 03:10:32 +00:00
import org.thdl.tib.text.ttt.TConverter ;
2005-06-20 09:30:35 +00:00
import org.thdl.tib.text.ttt.TTraits ;
2005-07-11 03:10:32 +00:00
import org.thdl.util.RTFFixerInputStream ;
import org.thdl.util.ThdlDebug ;
import org.thdl.util.ThdlLazyException ;
import org.thdl.util.ThdlOptions ;
import org.thdl.util.ThdlVersion ;
2007-05-01 02:38:45 +00:00
import org.thdl.tib.scanner.* ;
2003-08-24 06:40:53 +00:00
2004-04-24 17:49:16 +00:00
/ * * TibetanConverter is a command - line utility for converting to and
* from Tibetan Machine Web ( TMW ) . It converts TMW to Wylie , ACIP ,
* Unicode , or to Tibetan Machine ( TM ) . It also converts to TMW from
* TM or ACIP . Some conversions use RTF ( rich text format ) ; some use
* text . Invoke it with no parameters for usage information . Full
2004-06-06 21:39:45 +00:00
* documentation is available at the website listed below .
*
* @see < a href = " http://thdltools.sourceforge.net/TMW_RTF_TO_THDL_WYLIE.html " > End - user documentation < / a >
*
2003-05-18 14:14:47 +00:00
* @author David Chandler * /
2003-06-25 01:04:24 +00:00
public class TibetanConverter implements FontConverterConstants {
private static final boolean debug = false ;
2003-06-24 03:02:29 +00:00
/** Default constructor; does nothing */
TibetanConverter ( ) { }
2003-05-18 17:17:52 +00:00
static final String rtfErrorMessage
= " The Rich Text Format (RTF) file selected contains constructs that \ nJskad cannot handle. If you got the RTF file from saving a Word \ ndocument as RTF, try saving that same document as RTF in \ nWord 2000 instead of Word XP or in Word 97 instead of \ nWord 2000. Older versions of Word produce RTF that Jskad \ ncan more easily deal with. OpenOffice and StarOffice may also \ nproduce better-behaved RTF. " ;
2003-05-18 14:14:47 +00:00
/ * *
* Runs the converter . * /
2004-04-24 17:49:16 +00:00
public static void main ( String [ ] args ) {
2003-09-04 04:34:18 +00:00
// No need for the TM or TMW fonts.
System . setProperty ( " thdl.rely.on.system.tmw.fonts " , " true " ) ;
System . setProperty ( " thdl.rely.on.system.tm.fonts " , " true " ) ;
2003-07-01 02:50:09 +00:00
// Runs on Linux/Unix boxes without X11 servers:
System . setProperty ( " java.awt.headless " , " true " ) ;
2003-05-18 17:17:52 +00:00
System . exit ( realMain ( args , System . out ) ) ;
}
/ * * Runs the converter without exiting the program .
* @return the exit code . * /
public static int realMain ( String [ ] args , PrintStream out ) {
2003-05-18 14:14:47 +00:00
try {
2005-02-13 00:34:47 +00:00
boolean convertTmwToTmwMode = false ;
2003-06-15 18:38:42 +00:00
boolean convertToUnicodeMode = false ;
2003-05-31 23:21:29 +00:00
boolean convertToTMMode = false ;
2003-08-24 06:40:53 +00:00
boolean convertACIPToUniMode = false ;
2003-08-31 16:06:35 +00:00
boolean convertACIPToTMWMode = false ;
2005-06-20 09:30:35 +00:00
boolean convertWylieToUniMode = false ;
boolean convertWylieToTMWMode = false ;
2003-06-08 22:43:27 +00:00
boolean convertToTMWMode = false ;
2003-09-05 02:05:34 +00:00
boolean convertToWylieRTFMode = false ;
boolean convertToWylieTextMode = false ;
boolean convertToACIPRTFMode = false ;
boolean convertToACIPTextMode = false ;
2005-07-17 03:32:57 +00:00
boolean convertUniToWylieTextMode = false ;
2003-05-18 14:14:47 +00:00
boolean findSomeNonTMWMode = false ;
boolean findAllNonTMWMode = false ;
2003-06-22 00:14:18 +00:00
boolean findSomeNonTMMode = false ;
boolean findAllNonTMMode = false ;
2003-09-06 22:56:10 +00:00
boolean colors = false ;
2004-04-24 17:49:16 +00:00
boolean shortMessages = false ;
String warningLevel = null ;
2003-09-04 05:16:47 +00:00
2003-05-18 14:14:47 +00:00
// Process arguments:
2004-04-24 17:49:16 +00:00
final int numArgs = 8 ;
2003-09-06 22:56:10 +00:00
if ( ( args . length ! = 1 & & args . length ! = numArgs )
2003-05-18 14:14:47 +00:00
| | ( args . length = = 1
2003-05-31 23:21:29 +00:00
& & ! ( args [ 0 ] . equals ( " -v " )
| | args [ 0 ] . equals ( " --version " ) ) )
2003-09-06 22:56:10 +00:00
| | ( args . length = = numArgs
2004-04-24 17:49:16 +00:00
& & ( ! ( args [ numArgs - 8 ] . equals ( " --colors " ) )
| | ! ( ( colors = args [ numArgs - 7 ] . equals ( " yes " ) )
| | args [ numArgs - 7 ] . equals ( " no " ) )
| | ! ( args [ numArgs - 6 ] . equals ( " --warning-level " ) )
| | ! ( ( warningLevel = args [ numArgs - 5 ] ) . equals ( " Most " )
| | warningLevel . equals ( " Some " )
| | warningLevel . equals ( " All " )
| | warningLevel . equals ( " None " ) )
| | ! ( args [ numArgs - 4 ] . equals ( " --acip-to-tibetan-warning-and-error-messages " ) )
| | ! ( ( shortMessages = args [ numArgs - 3 ] . equals ( " short " ) )
| | args [ numArgs - 3 ] . equals ( " long " ) )
2003-09-06 22:56:10 +00:00
| | ! ( ( findAllNonTMWMode
= args [ numArgs - 2 ] . equals ( " --find-all-non-tmw " ) )
2005-02-13 00:34:47 +00:00
| | ( convertTmwToTmwMode
= args [ numArgs - 2 ] . equals ( " --tmw-to-tmw-for-testing " ) )
2003-09-06 22:56:10 +00:00
| | ( convertToTMMode
= args [ numArgs - 2 ] . equals ( " --to-tibetan-machine " ) )
2005-07-17 03:32:57 +00:00
| | ( convertUniToWylieTextMode
= args [ numArgs - 2 ] . equals ( " --utf8-text-to-ewts-text " ) )
2003-09-06 22:56:10 +00:00
| | ( convertToTMWMode
= args [ numArgs - 2 ] . equals ( " --to-tibetan-machine-web " ) )
| | ( convertACIPToUniMode
= args [ numArgs - 2 ] . equals ( " --acip-to-unicode " ) )
| | ( convertACIPToTMWMode
= args [ numArgs - 2 ] . equals ( " --acip-to-tmw " ) )
2005-06-20 09:30:35 +00:00
| | ( convertWylieToUniMode
= args [ numArgs - 2 ] . equals ( " --wylie-to-unicode " ) )
| | ( convertWylieToTMWMode
= args [ numArgs - 2 ] . equals ( " --wylie-to-tmw " ) )
2003-09-06 22:56:10 +00:00
| | ( convertToUnicodeMode
= args [ numArgs - 2 ] . equals ( " --to-unicode " ) )
| | ( convertToWylieRTFMode
= args [ numArgs - 2 ] . equals ( " --to-wylie " ) )
| | ( convertToWylieTextMode
= args [ numArgs - 2 ] . equals ( " --to-wylie-text " ) )
| | ( convertToACIPRTFMode
= args [ numArgs - 2 ] . equals ( " --to-acip " ) )
| | ( convertToACIPTextMode
= args [ numArgs - 2 ] . equals ( " --to-acip-text " ) )
| | ( findSomeNonTMWMode
= args [ numArgs - 2 ] . equals ( " --find-some-non-tmw " ) )
| | ( findSomeNonTMMode
= args [ numArgs - 2 ] . equals ( " --find-some-non-tm " ) )
| | ( findAllNonTMMode
= args [ numArgs - 2 ] . equals ( " --find-all-non-tm " ) )
) ) ) ) {
2004-04-24 17:49:16 +00:00
if ( args . length ! = numArgs ) {
out . println ( " " ) ;
out . println ( " Wrong number of arguments; needs " + numArgs + " arguments. " ) ;
out . println ( " " ) ;
}
2003-09-07 22:08:00 +00:00
out . println ( " TibetanConverter --colors yes|no " ) ;
out . println ( " --warning-level None|Some|Most|All " ) ;
2005-07-06 07:46:21 +00:00
out . println ( " --acip-to-tibetan-warning-and-error-messages short|long " ) ; // TODO(DLC)[EWTS->Tibetan]: misnomer, ewts and acip both are affected
2003-09-07 22:08:00 +00:00
out . println ( " --find-all-non-tmw | --find-some-non-tmw " ) ;
2005-02-13 00:34:47 +00:00
out . println ( " | --tmw-to-tmw-for-testing " ) ;
2003-09-07 22:08:00 +00:00
out . println ( " | --to-tibetan-machine | --to-tibetan-machine-web " ) ;
out . println ( " | --to-unicode | --to-wylie | --to-acip " ) ;
2003-12-07 19:10:36 +00:00
out . println ( " | --to-wylie-text | --to-acip-text " ) ;
2005-06-20 09:30:35 +00:00
out . println ( " | --wylie-to-unicode | --wylie-to-tmw " ) ;
2003-12-07 19:10:36 +00:00
out . println ( " | --acip-to-unicode | --acip-to-tmw RTF_file|TXT_file " ) ;
2003-06-15 19:19:23 +00:00
out . println ( " | TibetanConverter [--version | -v | --help | -h] " ) ;
2003-05-18 17:17:52 +00:00
out . println ( " " ) ;
out . println ( " Distributed under the terms of the THDL Open Community License Version 1.0. " ) ;
out . println ( " " ) ;
out . println ( " Usage: " ) ;
out . println ( " -v | --version for version info " ) ;
2003-09-04 04:34:18 +00:00
out . println ( " " ) ;
2003-05-18 17:17:52 +00:00
out . println ( " -h | --help for this message " ) ;
2003-09-04 04:34:18 +00:00
out . println ( " " ) ;
2005-07-06 07:46:21 +00:00
out . println ( " --wylie-to-unicode to convert an EWTS text file to a Unicode " ) ;
out . println ( " " ) ;
out . println ( " --wylie-to-tmw to convert an EWTS text file to TibetanMachineWeb " ) ;
out . println ( " " ) ;
2003-08-24 06:40:53 +00:00
out . println ( " --to-tibetan-machine to convert TibetanMachineWeb to TibetanMachine " ) ;
2003-09-04 04:34:18 +00:00
out . println ( " " ) ;
2003-08-24 06:40:53 +00:00
out . println ( " --to-unicode to convert TibetanMachineWeb to Unicode " ) ;
2003-09-04 04:34:18 +00:00
out . println ( " " ) ;
2003-08-24 06:40:53 +00:00
out . println ( " --to-tibetan-machine-web to convert TibetanMachine to TibetanMachineWeb " ) ;
2003-09-04 04:34:18 +00:00
out . println ( " " ) ;
2003-09-04 05:16:47 +00:00
out . println ( " --to-wylie to convert TibetanMachineWeb to THDL Extended Wylie in RTF " ) ;
out . println ( " " ) ;
out . println ( " --to-wylie-text to convert TibetanMachineWeb to THDL Extended Wylie in text " ) ;
2003-09-04 04:34:18 +00:00
out . println ( " " ) ;
2003-09-04 05:16:47 +00:00
out . println ( " --to-acip to convert TibetanMachineWeb to ACIP in RTF " ) ;
out . println ( " " ) ;
out . println ( " --to-acip-text to convert TibetanMachineWeb to ACIP in text " ) ;
2003-09-04 04:34:18 +00:00
out . println ( " " ) ;
2003-08-24 06:40:53 +00:00
out . println ( " --acip-to-unicode to convert ACIP text file to Unicode text file " ) ;
2003-09-04 04:34:18 +00:00
out . println ( " " ) ;
out . println ( " --acip-to-tmw to convert ACIP text file to Tibetan Machine Web RTF File. " ) ;
out . println ( " " ) ;
2003-05-18 17:17:52 +00:00
out . println ( " --find-all-non-tmw to locate all characters in the input document that are " ) ;
2003-06-22 00:14:18 +00:00
out . println ( " not in Tibetan Machine Web fonts, exit zero if and only if none found " ) ;
2003-09-04 04:34:18 +00:00
out . println ( " " ) ;
2003-05-18 17:17:52 +00:00
out . println ( " --find-some-non-tmw to locate all distinct characters in the input document " ) ;
2003-06-22 00:14:18 +00:00
out . println ( " not in Tibetan Machine Web fonts, exit zero if and only if none found " ) ;
2003-09-04 04:34:18 +00:00
out . println ( " " ) ;
2003-06-22 00:14:18 +00:00
out . println ( " --find-all-non-tm to locate all characters in the input document that are " ) ;
out . println ( " not in Tibetan Machine fonts, exit zero if and only if none found " ) ;
2003-09-04 04:34:18 +00:00
out . println ( " " ) ;
2003-06-22 00:14:18 +00:00
out . println ( " --find-some-non-tm to locate all distinct characters in the input document " ) ;
out . println ( " not in Tibetan Machine fonts, exit zero if and only if none found " ) ;
out . println ( " " ) ;
2003-09-04 04:34:18 +00:00
out . println ( " " ) ;
2004-04-24 17:49:16 +00:00
out . println ( " In --to... and --acip-to... modes, needs one argument, the name of the " ) ;
out . println ( " TibetanMachineWeb RTF file (for --to-wylie, --to-wylie-text, --to-acip-text, " ) ;
out . println ( " --to-acip, --to-unicode, and --to-tibetan-machine) or the name of " ) ;
out . println ( " the TibetanMachine RTF file (for --to-tibetan-machine-web) or the name of the " ) ;
out . println ( " ACIP text file (for --acip-to-unicode or --acip-to-tmw). Writes the " ) ;
out . println ( " result to standard output (after dealing with the curly brace problem if " ) ;
out . println ( " the input is TibetanMachineWeb). Exit code is zero on success, 42 if some " ) ;
out . println ( " glyphs couldn't be converted (in which case the output is just those glyphs), " ) ;
out . println ( " 44 if a TMW->Wylie conversion ran into some glyphs that couldn't be " ) ;
out . println ( " converted, in which case ugly error messages like " ) ;
out . println ( " \" <<[[JSKAD_TMW_TO_WYLIE_ERROR_NO_SUCH_WYLIE: Cannot convert DuffCode... \" " ) ;
out . println ( " are in your document waiting for your personal attention, " ) ;
out . println ( " 43 if not even one glyph found was eligible for this conversion, which means " ) ;
out . println ( " that you probably selected the wrong conversion or the wrong document, or " ) ;
2005-07-17 03:32:57 +00:00
out . println ( " nonzero on some other error. " ) ;
// TODO(dchandler): describe 47 48 50 etc.
2004-04-24 17:49:16 +00:00
out . println ( " " ) ;
out . println ( " You may find it helpful to use `--find-some-non-tmw' mode (or " ) ;
out . println ( " `--find-some-non-tm' mode for Tibetan Machine input) before doing a " ) ;
out . println ( " conversion so that you have confidence in the conversion's correctness. " ) ;
2003-05-18 17:17:52 +00:00
out . println ( " " ) ;
2004-04-24 17:49:16 +00:00
out . println ( " When using short error and warning messages for ACIP->Tibetan conversions, " ) ;
out . println ( " i.e. when '--acip-to-tibetan-warning-and-error-messages short' is given, " ) ;
out . println ( " the output will contain error and warning numbers. The following are the " ) ;
out . println ( " long forms of each warning and error: " ) ;
out . println ( " " ) ;
org . thdl . tib . text . ttt . ErrorsAndWarnings . printErrorAndWarningDescriptions ( out ) ;
2003-05-18 17:17:52 +00:00
return 77 ;
2003-05-18 14:14:47 +00:00
}
if ( args [ 0 ] . equals ( " --version " ) | | args [ 0 ] . equals ( " -v " ) ) {
2004-04-24 17:49:16 +00:00
out . println ( " TibetanConverter version 0.84 " ) ;
2003-05-31 23:21:29 +00:00
out . println ( " Compiled at "
+ ThdlVersion . getTimeOfCompilation ( ) ) ;
2003-05-18 17:17:52 +00:00
return 77 ;
2003-05-18 14:14:47 +00:00
}
2003-06-22 21:05:16 +00:00
String inputRtfPath = args [ args . length - 1 ] ;
2003-05-18 14:14:47 +00:00
2003-06-24 03:02:29 +00:00
InputStream in ;
if ( inputRtfPath . equals ( " - " ) )
in = System . in ;
else
in = new FileInputStream ( inputRtfPath ) ;
String conversionTag = null ;
2003-05-18 14:14:47 +00:00
if ( findAllNonTMWMode ) {
2003-06-25 01:04:24 +00:00
conversionTag = FIND_ALL_NON_TMW ;
2003-05-18 14:14:47 +00:00
} else if ( findSomeNonTMWMode ) {
2003-06-25 01:04:24 +00:00
conversionTag = FIND_SOME_NON_TMW ;
2003-06-22 00:14:18 +00:00
} else if ( findSomeNonTMMode ) {
2003-06-25 01:04:24 +00:00
conversionTag = FIND_SOME_NON_TM ;
2003-06-22 00:14:18 +00:00
} else if ( findAllNonTMMode ) {
2003-06-25 01:04:24 +00:00
conversionTag = FIND_ALL_NON_TM ;
2003-05-31 23:21:29 +00:00
} else { // conversion {to Wylie or TM} mode
2003-09-05 02:05:34 +00:00
if ( convertToWylieRTFMode ) {
2003-06-25 01:04:24 +00:00
conversionTag = TMW_TO_WYLIE ;
2003-09-05 02:05:34 +00:00
} else if ( convertToWylieTextMode ) {
conversionTag = TMW_TO_WYLIE_TEXT ;
2005-07-17 03:32:57 +00:00
} else if ( convertUniToWylieTextMode ) {
conversionTag = UNI_TO_WYLIE_TEXT ;
2003-09-05 02:05:34 +00:00
} else if ( convertToACIPRTFMode ) {
2003-09-02 06:39:33 +00:00
conversionTag = TMW_TO_ACIP ;
2003-09-05 02:05:34 +00:00
} else if ( convertToACIPTextMode ) {
conversionTag = TMW_TO_ACIP_TEXT ;
2003-06-15 18:38:42 +00:00
} else if ( convertToUnicodeMode ) {
2003-06-25 01:04:24 +00:00
conversionTag = TMW_TO_UNI ;
2005-02-13 00:34:47 +00:00
} else if ( convertTmwToTmwMode ) {
2005-06-20 09:30:35 +00:00
conversionTag = TMW_TO_SAME_TMW ;
2003-06-08 22:43:27 +00:00
} else if ( convertToTMWMode ) {
2003-06-25 01:04:24 +00:00
conversionTag = TM_TO_TMW ;
2003-08-24 06:40:53 +00:00
} else if ( convertACIPToUniMode ) {
2003-09-05 02:05:34 +00:00
conversionTag = ACIP_TO_UNI_TEXT ;
2003-08-31 16:06:35 +00:00
} else if ( convertACIPToTMWMode ) {
conversionTag = ACIP_TO_TMW ;
2005-06-20 09:30:35 +00:00
} else if ( convertWylieToUniMode ) {
conversionTag = WYLIE_TO_UNI_TEXT ;
} else if ( convertWylieToTMWMode ) {
conversionTag = WYLIE_TO_TMW ;
2003-05-31 23:21:29 +00:00
} else {
ThdlDebug . verify ( convertToTMMode ) ;
2003-06-26 22:48:51 +00:00
conversionTag = TMW_TO_TM ;
2003-05-31 23:21:29 +00:00
}
2003-05-18 14:14:47 +00:00
}
2003-09-05 02:05:34 +00:00
return reallyConvert ( in , out , conversionTag ,
2004-04-24 17:49:16 +00:00
warningLevel . intern ( ) , shortMessages ,
colors ) ;
2003-05-18 14:14:47 +00:00
} catch ( ThdlLazyException e ) {
2003-06-15 19:19:23 +00:00
out . println ( " TibetanConverter has a BUG: " ) ;
2003-05-18 17:17:52 +00:00
e . getRealException ( ) . printStackTrace ( out ) ;
2003-12-14 07:41:15 +00:00
System . err . println ( " TibetanConverter has a BUG: " ) ;
e . getRealException ( ) . printStackTrace ( System . err ) ;
2003-05-18 17:17:52 +00:00
return 7 ;
2003-05-18 14:14:47 +00:00
} catch ( IOException e ) {
2003-05-18 17:17:52 +00:00
e . printStackTrace ( out ) ;
2003-12-14 07:41:15 +00:00
e . printStackTrace ( System . err ) ;
2003-05-18 17:17:52 +00:00
return 4 ;
2003-10-26 06:02:48 +00:00
} catch ( OutOfMemoryError e ) {
e . printStackTrace ( out ) ;
2003-12-14 07:41:15 +00:00
e . printStackTrace ( System . err ) ;
2003-10-26 06:02:48 +00:00
throw e ;
2003-05-18 14:14:47 +00:00
}
2004-04-24 17:49:16 +00:00
}
2003-06-24 03:02:29 +00:00
/ * * Reads from in , closes in , converts ( or finds some / all
non - TM / TMW ) , writes the result to out , does not close out .
The action taken depends on ct , which must be one of a set
2004-04-24 17:49:16 +00:00
number of strings - - see the code . Uses short error and
warning messages if shortMessages is true ; gives no warnings
or many warnings depending on warningLevel . Returns an
appropriate return code so that TibetanConverter ' s usage
message is honored . * /
2003-08-24 06:40:53 +00:00
static int reallyConvert ( InputStream in , PrintStream out , String ct ,
2004-04-24 17:49:16 +00:00
String warningLevel , boolean shortMessages ,
boolean colors ) {
2007-05-01 02:38:45 +00:00
if ( UNI_TO_WYLIE_TEXT = = ct | | WYLIE_TO_ACIP_TEXT = = ct | | ACIP_TO_WYLIE_TEXT = = ct ) {
2005-07-17 03:32:57 +00:00
try {
2007-05-01 02:38:45 +00:00
/ * String uniText ;
2005-07-17 03:32:57 +00:00
{
// TODO(dchandler): use, here and elsewhere in the
// codebase,
// org.apache.commons.io.IOUtils.toString(InputStream,
// encoding)
StringBuffer s = new StringBuffer ( ) ;
char ch [ ] = new char [ 8192 ] ;
BufferedReader bin
= new BufferedReader ( new InputStreamReader ( in ,
" UTF-8 " ) ) ;
int amt ;
while ( - 1 ! = ( amt = bin . read ( ch ) ) ) {
s . append ( ch , 0 , amt ) ;
}
bin . close ( ) ;
uniText = s . toString ( ) ;
}
StringBuffer errors = new StringBuffer ( ) ;
2005-08-01 05:54:20 +00:00
// TODO(dchandler): DLC: use human-friendly EWTS, not
// computer-friendly!
String ewtsText = Converter . convertToEwtsForComputers ( uniText ,
errors ) ;
2005-07-17 03:32:57 +00:00
// TODO(dchandler): is 51 the right choice?
2007-05-01 02:38:45 +00:00
return ( errors . length ( ) > 0 ) ? 51 : 0 ; * /
BasicTibetanTranscriptionConverter bc = null ;
if ( UNI_TO_WYLIE_TEXT = = ct ) bc = new BasicTibetanTranscriptionConverter ( new BufferedReader ( new InputStreamReader ( in , " UTF16 " ) ) , new PrintWriter ( out ) ) ;
else bc = new BasicTibetanTranscriptionConverter ( new BufferedReader ( new InputStreamReader ( in ) ) , new PrintWriter ( out ) ) ;
bc . run ( ct ) ;
return 0 ;
2005-07-17 03:32:57 +00:00
} catch ( IOException e ) {
// TODO(dchandler): print it? where to?
return 48 ;
}
} else if ( ACIP_TO_UNI_TEXT = = ct | | ACIP_TO_TMW = = ct
| | WYLIE_TO_UNI_TEXT = = ct | | WYLIE_TO_TMW = = ct ) {
2003-08-24 06:40:53 +00:00
try {
2004-04-24 17:49:16 +00:00
ArrayList al
2005-06-20 09:30:35 +00:00
= ( ( ACIP_TO_UNI_TEXT = = ct | | ACIP_TO_TMW = = ct )
? ( TTraits ) ACIPTraits . instance ( )
: ( TTraits ) EWTSTraits . instance ( ) ) . scanner ( ) . scanStream ( in , null ,
ThdlOptions . getIntegerOption ( ( ACIP_TO_UNI_TEXT = = ct | | ACIP_TO_TMW = = ct )
? " thdl.most.errors.a.tibetan.acip.document.can.have "
: " thdl.most.errors.a.tibetan.ewts.document.can.have " ,
1000 - 1 ) ,
shortMessages ,
warningLevel ) ;
2003-08-24 06:40:53 +00:00
if ( null = = al )
return 47 ;
boolean embeddedWarnings = ( warningLevel ! = " None " ) ;
2003-12-14 07:41:15 +00:00
boolean hasWarnings [ ] = new boolean [ ] { false } ;
2005-06-20 09:30:35 +00:00
if ( ACIP_TO_UNI_TEXT = = ct
| | WYLIE_TO_UNI_TEXT = = ct ) {
if ( ! TConverter . convertToUnicodeText ( ( WYLIE_TO_UNI_TEXT = = ct )
? ( TTraits ) EWTSTraits . instance ( )
: ( TTraits ) ACIPTraits . instance ( ) ,
2005-02-22 04:36:54 +00:00
al , out , null ,
2005-02-21 01:35:23 +00:00
null , hasWarnings ,
embeddedWarnings ,
warningLevel ,
shortMessages ) )
2003-08-31 16:06:35 +00:00
return 46 ;
} else {
2005-06-20 09:30:35 +00:00
if ( ! TConverter . convertToTMW ( ( WYLIE_TO_TMW = = ct )
? ( TTraits ) EWTSTraits . instance ( )
: ( TTraits ) ACIPTraits . instance ( ) ,
2005-02-22 04:36:54 +00:00
al , out , null , null ,
2005-02-21 01:35:23 +00:00
hasWarnings ,
embeddedWarnings ,
warningLevel , shortMessages ,
colors ) )
2003-08-31 16:06:35 +00:00
return 46 ;
}
2003-12-14 07:41:15 +00:00
if ( embeddedWarnings & & hasWarnings [ 0 ] )
2003-08-24 06:40:53 +00:00
return 45 ;
else
return 0 ;
} catch ( IOException e ) {
2005-07-17 03:32:57 +00:00
// TODO(dchandler): print it? where to?
2003-08-24 06:40:53 +00:00
return 48 ;
}
} else {
TibetanDocument tdoc = new TibetanDocument ( ) ;
{
SimpleAttributeSet ras = new SimpleAttributeSet ( ) ;
StyleConstants . setFontFamily ( ras ,
ThdlOptions . getStringOption ( " thdl.default.roman.font.face " ,
" Serif " ) ) ;
StyleConstants . setFontSize ( ras ,
ThdlOptions . getIntegerOption ( " thdl.default.roman.font.size " ,
14 ) ) ;
tdoc . setRomanAttributeSet ( ras ) ;
}
try {
// Read in the rtf file.
if ( debug ) System . err . println ( " Start: reading in old RTF file " ) ;
if ( ! ThdlOptions . getBooleanOption ( " thdl.do.not.fix.rtf.hex.escapes " ) )
in = new RTFFixerInputStream ( in ) ;
( new RTFEditorKit ( ) ) . read ( in , tdoc , 0 ) ;
if ( debug ) System . err . println ( " End : reading in old RTF file " ) ;
} catch ( Exception e ) {
out . println ( " TibetanConverter: \ n "
+ rtfErrorMessage ) ;
return 3 ;
}
try {
in . close ( ) ;
} catch ( IOException e ) {
// silently ignore; we don't care about the input so much...
ThdlDebug . noteIffyCode ( ) ;
2003-06-24 03:02:29 +00:00
}
2003-06-29 21:31:48 +00:00
2003-08-24 06:40:53 +00:00
if ( FIND_ALL_NON_TMW = = ct ) {
// 0, -1 is the entire document.
int exitCode
= tdoc . findAllNonTMWCharacters ( 0 , - 1 , out ) ;
if ( out . checkError ( ) )
exitCode = 41 ;
return exitCode ;
} else if ( FIND_SOME_NON_TMW = = ct ) {
// 0, -1 is the entire document.
int exitCode
= tdoc . findSomeNonTMWCharacters ( 0 , - 1 , out ) ;
if ( out . checkError ( ) )
exitCode = 41 ;
return exitCode ;
} else if ( FIND_SOME_NON_TM = = ct ) {
// 0, -1 is the entire document.
int exitCode
= tdoc . findSomeNonTMCharacters ( 0 , - 1 , out ) ;
if ( out . checkError ( ) )
exitCode = 41 ;
return exitCode ;
} else if ( FIND_ALL_NON_TM = = ct ) {
// 0, -1 is the entire document.
int exitCode
= tdoc . findAllNonTMCharacters ( 0 , - 1 , out ) ;
if ( out . checkError ( ) )
exitCode = 41 ;
return exitCode ;
} else { // conversion {to Wylie or TM} mode
// Fix curly braces in the entire document if the input is TMW:
if ( TM_TO_TMW ! = ct ) {
// DLC make me optional
if ( debug ) System . err . println ( " Start: solving curly brace problem " ) ;
tdoc . replaceTahomaCurlyBracesAndBackslashes ( 0 , - 1 ) ;
if ( debug ) System . err . println ( " End : solving curly brace problem " ) ;
2003-06-24 03:02:29 +00:00
}
2003-08-24 06:40:53 +00:00
int exitCode = 0 ;
ThdlDebug . verify ( ( ( TMW_TO_TM = = ct ) ? 1 : 0 )
2005-06-20 09:30:35 +00:00
+ ( ( TMW_TO_SAME_TMW = = ct ) ? 1 : 0 )
2003-08-24 06:40:53 +00:00
+ ( ( TMW_TO_UNI = = ct ) ? 1 : 0 )
+ ( ( TM_TO_TMW = = ct ) ? 1 : 0 )
2003-09-02 06:39:33 +00:00
+ ( ( TMW_TO_ACIP = = ct ) ? 1 : 0 )
2003-09-05 02:05:34 +00:00
+ ( ( TMW_TO_ACIP_TEXT = = ct ) ? 1 : 0 )
2003-08-24 06:40:53 +00:00
+ ( ( TMW_TO_WYLIE = = ct ) ? 1 : 0 )
2003-09-05 02:05:34 +00:00
+ ( ( TMW_TO_WYLIE_TEXT = = ct ) ? 1 : 0 )
2003-08-24 06:40:53 +00:00
= = 1 ) ;
long numAttemptedReplacements [ ] = new long [ ] { 0 } ;
2005-06-20 09:30:35 +00:00
if ( TMW_TO_SAME_TMW = = ct ) {
2005-02-13 00:34:47 +00:00
// Identity conversion for testing
if ( tdoc . identityTmwToTmwConversion ( 0 ,
tdoc . getLength ( ) ,
numAttemptedReplacements ) ) {
exitCode = 50 ;
}
} else if ( TMW_TO_WYLIE = = ct | | TMW_TO_WYLIE_TEXT = = ct ) {
2003-08-24 06:40:53 +00:00
// Convert to THDL Wylie:
if ( ! tdoc . toWylie ( 0 ,
tdoc . getLength ( ) ,
2003-07-01 01:21:57 +00:00
numAttemptedReplacements ) ) {
2003-08-24 06:40:53 +00:00
exitCode = 44 ;
}
2003-09-12 05:06:37 +00:00
} else if ( TMW_TO_ACIP = = ct | | TMW_TO_ACIP_TEXT = = ct ) {
2003-09-02 06:39:33 +00:00
// Convert to ACIP:
if ( ! tdoc . toACIP ( 0 ,
tdoc . getLength ( ) ,
numAttemptedReplacements ) ) {
exitCode = 49 ;
}
2003-08-24 06:40:53 +00:00
} else if ( TMW_TO_UNI = = ct ) {
StringBuffer errors = new StringBuffer ( ) ;
// Convert to Unicode:
if ( tdoc . convertToUnicode ( 0 ,
tdoc . getLength ( ) ,
errors ,
ThdlOptions . getStringOption ( " thdl.tmw.to.unicode.font " ) . intern ( ) ,
numAttemptedReplacements ) ) {
System . err . println ( errors ) ;
exitCode = 42 ;
}
} else if ( TM_TO_TMW = = ct ) {
StringBuffer errors = new StringBuffer ( ) ;
// Convert to TibetanMachineWeb:
if ( tdoc . convertToTMW ( 0 , tdoc . getLength ( ) , errors ,
numAttemptedReplacements ) ) {
System . err . println ( errors ) ;
exitCode = 42 ;
}
} else {
ThdlDebug . verify ( TMW_TO_TM = = ct ) ;
StringBuffer errors = new StringBuffer ( ) ;
// Convert to TibetanMachine:
if ( tdoc . convertToTM ( 0 , tdoc . getLength ( ) , errors ,
numAttemptedReplacements ) ) {
System . err . println ( errors ) ;
exitCode = 42 ;
}
2003-06-24 03:02:29 +00:00
}
2003-08-24 06:40:53 +00:00
// Write to standard output the result:
2003-09-05 02:05:34 +00:00
if ( TMW_TO_WYLIE_TEXT = = ct | | TMW_TO_ACIP_TEXT = = ct ) {
2003-09-04 05:16:47 +00:00
try {
2003-09-12 05:06:37 +00:00
BufferedWriter bw
2005-02-27 10:27:37 +00:00
= new BufferedWriter ( new OutputStreamWriter ( out ,
" UTF-8 " ) ) ;
2003-09-12 05:06:37 +00:00
tdoc . writeTextOutput ( bw ) ;
bw . flush ( ) ;
2003-09-04 05:16:47 +00:00
} catch ( IOException e ) {
exitCode = 40 ;
}
} else {
try {
tdoc . writeRTFOutputStream ( out ) ;
} catch ( IOException e ) {
exitCode = 40 ;
}
2003-06-24 03:02:29 +00:00
}
2003-08-24 06:40:53 +00:00
if ( out . checkError ( ) )
exitCode = 41 ;
if ( numAttemptedReplacements [ 0 ] < 1 )
exitCode = 43 ;
2003-06-24 03:02:29 +00:00
2003-08-24 06:40:53 +00:00
return exitCode ;
2003-06-24 03:02:29 +00:00
}
}
}
2003-05-18 14:14:47 +00:00
}