2003-08-18 02:38:54 +00:00
/ *
The contents of this file are subject to the THDL Open Community License
Version 1 . 0 ( the " License " ) ; you may not use this file except in compliance
with the License . You may obtain a copy of the License on the THDL web site
( http : //www.thdl.org/).
Software distributed under the License is distributed on an " AS IS " basis ,
WITHOUT WARRANTY OF ANY KIND , either express or implied . See the
License for the specific terms governing rights and limitations under the
License .
The Initial Developer of this software is the Tibetan and Himalayan Digital
Library ( THDL ) . Portions created by the THDL are Copyright 2003 THDL .
All Rights Reserved .
Contributor ( s ) : ______________________________________ .
* /
package org.thdl.tib.text.ttt ;
import java.io.* ;
import java.util.ArrayList ;
import java.util.Stack ;
2003-09-05 05:54:35 +00:00
import java.awt.Color ;
2003-08-18 02:38:54 +00:00
import org.thdl.util.ThdlDebug ;
import org.thdl.util.ThdlOptions ;
2003-08-31 16:06:35 +00:00
import org.thdl.tib.text.TibetanDocument ;
import org.thdl.tib.text.TibetanMachineWeb ;
import org.thdl.tib.text.DuffCode ;
2003-08-18 02:38:54 +00:00
/ * *
2003-08-31 16:06:35 +00:00
* This class is able to convert an ACIP file into Tibetan Machine Web
* and an ACIP file into TMW . ACIP - > Unicode should yield the same
* results as ACIP - > TMW followed by TMW - > Unicode ( FIXME : test it ! )
2003-08-18 02:38:54 +00:00
* @author David Chandler
* /
public class ACIPConverter {
2003-08-24 06:40:53 +00:00
// DLC NOW: (KA)'s info is lost when you convert to Unicode text instead of Unicode RTF. Give an ERROR.
2003-09-06 22:56:10 +00:00
// DLC NOW: IMPLEMENT (KA) font shrinking
// DLC NOW: BAo isn't converting.
// DLC NOW: tRAStA is not converter correctly to Unicode, and no
// warning is given when converting to TMW (Wait! isn't the "a
// stack occurs w/o a vowel" warning given?)
2003-08-18 02:38:54 +00:00
/ * * Command - line converter . Gives error messages on standard
* output about why we can ' t convert the document perfectly and
* exits with non - zero return code , or is silent otherwise and
* exits with code zero . < p > FIXME : not so efficient ; copies the
* whole file into memory first . * /
public static void main ( String [ ] args )
2003-08-24 06:40:53 +00:00
throws IOException
2003-08-18 02:38:54 +00:00
{
2003-09-04 04:04:21 +00:00
// We don't want to load the TM or TMW font files ourselves:
ThdlOptions . setUserPreference ( " thdl.rely.on.system.tmw.fonts " , true ) ;
ThdlOptions . setUserPreference ( " thdl.rely.on.system.tm.fonts " , true ) ;
ThdlOptions . setUserPreference ( " thdl.debug " , true ) ;
2003-08-18 02:38:54 +00:00
boolean verbose = true ;
2003-08-24 06:40:53 +00:00
if ( args . length ! = 1 ) {
System . out . println ( " Bad args! Need just the name of the ACIP text file. " ) ;
2003-08-18 02:38:54 +00:00
}
StringBuffer errors = new StringBuffer ( ) ;
int maxErrors = 250 ;
2003-08-24 06:40:53 +00:00
ArrayList al = ACIPTshegBarScanner . scanFile ( args [ 0 ] , errors , maxErrors - 1 ) ;
2003-08-18 02:38:54 +00:00
if ( null = = al ) {
2003-08-23 22:03:37 +00:00
System . err . println ( maxErrors + " or more lexical errors occurred while scanning ACIP input file; is this " ) ;
2003-08-18 02:38:54 +00:00
System . err . println ( " Tibetan or English input? " ) ;
System . err . println ( " " ) ;
2003-08-23 22:03:37 +00:00
if ( false ) {
// Nobody wants to see this. FIXME: maybe somebody; have an option.
System . err . println ( " First " + maxErrors + " lexical errors scanning ACIP input file: " ) ;
System . err . println ( errors ) ;
}
System . err . println ( " Exiting with " + maxErrors + " or more lexical errors; please fix input file and try again. " ) ;
2003-08-18 02:38:54 +00:00
System . exit ( 1 ) ;
}
2003-08-24 06:40:53 +00:00
final boolean abortUponScanningError = false ;
2003-08-18 02:38:54 +00:00
if ( errors . length ( ) > 0 ) {
System . err . println ( " Errors scanning ACIP input file: " ) ;
System . err . println ( errors ) ;
2003-08-23 22:03:37 +00:00
if ( abortUponScanningError ) {
System . err . println ( " Exiting; please fix input file and try again. " ) ;
System . exit ( 1 ) ;
}
2003-08-18 02:38:54 +00:00
}
2003-09-06 22:56:10 +00:00
String warningLevel = " Most " ;
boolean colors = true ;
2003-08-24 06:40:53 +00:00
StringBuffer warnings = null ;
boolean putWarningsInOutput = false ;
if ( " None " ! = warningLevel ) {
warnings = new StringBuffer ( ) ;
putWarningsInOutput = true ;
}
2003-08-31 16:06:35 +00:00
convertToTMW ( al , System . out , errors , warnings ,
2003-09-06 22:56:10 +00:00
putWarningsInOutput , warningLevel , colors ) ;
2003-08-31 16:06:35 +00:00
int retCode = 0 ;
2003-08-18 02:38:54 +00:00
if ( errors . length ( ) > 0 ) {
System . err . println ( " Errors converting ACIP input file: " ) ;
System . err . println ( errors ) ;
2003-08-23 22:03:37 +00:00
System . err . println ( " The output contains these errors. " ) ;
2003-08-18 02:38:54 +00:00
System . err . println ( " Exiting; please fix input file and try again. " ) ;
2003-08-31 16:06:35 +00:00
retCode = 2 ;
2003-08-18 02:38:54 +00:00
}
2003-08-24 06:40:53 +00:00
if ( null ! = warnings & & warnings . length ( ) > 0 ) {
2003-08-23 22:03:37 +00:00
System . err . println ( " Warnings converting ACIP input file: " ) ;
System . err . println ( warnings ) ;
if ( putWarningsInOutput )
System . err . println ( " The output contains these warnings. " ) ;
2003-08-31 16:06:35 +00:00
retCode = 2 ;
2003-08-23 22:03:37 +00:00
}
2003-08-31 16:06:35 +00:00
if ( 0 = = retCode ) {
if ( verbose ) System . err . println ( " Converted " + args [ 0 ] + " perfectly. " ) ;
}
System . exit ( retCode ) ;
2003-08-18 02:38:54 +00:00
}
/ * * Writes TMW / Latin to out . If errors occur in converting a
2003-08-31 16:06:35 +00:00
* tsheg bar , then they are written into the output , and also
* appended to errors if errors is non - null . If warnings occur
* in converting a tsheg bar , then they are written into the
* output if writeWarningsToResult is true , and also appended to
* warnings if warnings is non - null . Returns true upon perfect
* success or if there were merely warnings , false if errors
2003-08-18 02:38:54 +00:00
* occurred .
2003-09-06 22:56:10 +00:00
* @param colors true if and only if you want Sanskrit in one
* color , errors / warnings in another , and tsheg - bars affected by
* prefix rules in another
2003-08-18 02:38:54 +00:00
* @throws IOException if we cannot write to out
* /
2003-08-31 16:06:35 +00:00
public static boolean convertToTMW ( ArrayList scan ,
OutputStream out ,
StringBuffer errors ,
StringBuffer warnings ,
boolean writeWarningsToResult ,
2003-09-06 22:56:10 +00:00
String warningLevel ,
boolean colors )
2003-08-31 16:06:35 +00:00
throws IOException
{
TibetanDocument tdoc = new TibetanDocument ( ) ;
boolean rv
= convertToTMW ( scan , tdoc , errors , warnings ,
2003-09-06 22:56:10 +00:00
writeWarningsToResult , warningLevel , colors ) ;
2003-08-31 16:06:35 +00:00
tdoc . writeRTFOutputStream ( out ) ;
return rv ;
}
private static boolean convertToTMW ( ArrayList scan ,
TibetanDocument tdoc ,
StringBuffer errors ,
StringBuffer warnings ,
boolean writeWarningsToResult ,
2003-09-06 22:56:10 +00:00
String warningLevel ,
boolean colors )
2003-08-18 02:38:54 +00:00
throws IOException
{
2003-08-31 16:06:35 +00:00
return convertTo ( false , scan , null , tdoc , errors , warnings ,
2003-09-06 22:56:10 +00:00
writeWarningsToResult , warningLevel , colors ) ;
2003-08-18 02:38:54 +00:00
}
2003-08-31 16:06:35 +00:00
2003-08-18 02:38:54 +00:00
/ * * Returns UTF - 8 encoded Unicode . A bit indirect , so use this
* for testing only if performance is a concern . If errors occur
* in scanning the ACIP or in converting a tsheg bar , then they
2003-08-23 22:03:37 +00:00
* are appended to errors if errors is non - null , as well as
* written to the result . If warnings occur in scanning the ACIP
* or in converting a tsheg bar , then they are appended to
* warnings if warnings is non - null , and they are written to the
* result if writeWarningsToResult is true . Returns the
2003-08-31 16:06:35 +00:00
* conversion upon perfect success or if there were merely
* warnings , null if errors occurred .
2003-08-18 02:38:54 +00:00
* /
public static String convertToUnicode ( String acip ,
2003-08-23 22:03:37 +00:00
StringBuffer errors ,
StringBuffer warnings ,
2003-08-24 06:40:53 +00:00
boolean writeWarningsToResult ,
String warningLevel ) {
2003-08-18 02:38:54 +00:00
ByteArrayOutputStream sw = new ByteArrayOutputStream ( ) ;
2003-08-24 06:40:53 +00:00
ArrayList al = ACIPTshegBarScanner . scan ( acip , errors , - 1 ) ;
2003-08-18 02:38:54 +00:00
try {
2003-09-07 16:19:50 +00:00
if ( null ! = al ) {
convertToUnicode ( al , sw , errors ,
warnings , writeWarningsToResult ,
warningLevel ) ;
2003-08-18 02:38:54 +00:00
return sw . toString ( " UTF-8 " ) ;
} else {
return null ;
}
2003-09-07 18:30:59 +00:00
} catch ( IOException e ) {
2003-08-18 02:38:54 +00:00
throw new Error ( e . toString ( ) ) ;
}
}
2003-09-06 22:56:10 +00:00
/ * * Writes Unicode text ( not RTF ) to out . < em > NOTE WELL : This
* inherently cannot describe the ACIP { ( KA ) KHA } properly , as
* that requires showing KA in a smaller font than KHA , which is
* not possible in plain text . < / em > If errors occur in converting
* a tsheg bar , then they are appended to errors if errors is
* non - null . Furthermore , errors are written to out . If
* writeWarningsToOut is true , then warnings also will be written
* to out .
2003-08-24 06:40:53 +00:00
* @return true upon perfect success , false if errors occurred .
2003-08-23 22:03:37 +00:00
* @param scan result of ACIPTshegBarScanner . scan ( . . )
* @param out stream to which to write converted text
* @param errors if non - null , all error messages are appended
2003-09-06 22:56:10 +00:00
* @param warnings if non - null , all warning messages appropriate
* to warningLevel are appended
2003-08-23 22:03:37 +00:00
* @param writeWarningsToOut if true , then all warning messages
* are written to out in the appropriate places
2003-08-18 02:38:54 +00:00
* @throws IOException if we cannot write to out
* /
public static boolean convertToUnicode ( ArrayList scan ,
OutputStream out ,
2003-08-23 22:03:37 +00:00
StringBuffer errors ,
StringBuffer warnings ,
2003-08-24 06:40:53 +00:00
boolean writeWarningsToOut ,
String warningLevel )
2003-08-18 02:38:54 +00:00
throws IOException
2003-08-31 16:06:35 +00:00
{
return convertTo ( true , scan , out , null , errors , warnings ,
2003-09-06 22:56:10 +00:00
writeWarningsToOut , warningLevel , false ) ;
2003-08-31 16:06:35 +00:00
}
2003-09-04 04:04:21 +00:00
private static boolean peekaheadFindsSpacesAndComma ( ArrayList /* of ACIPString */ scan ,
int pos ) {
int sz = scan . size ( ) ;
while ( pos < sz ) {
ACIPString s = ( ACIPString ) scan . get ( pos + + ) ;
if ( s . getType ( ) = = ACIPString . TIBETAN_PUNCTUATION & & s . getText ( ) . equals ( " " ) ) {
// keep going
} else {
if ( s . getType ( ) = = ACIPString . TIBETAN_PUNCTUATION & & s . getText ( ) . equals ( " , " ) ) {
return true ;
} else {
return false ;
}
}
}
return false ;
}
2003-08-31 16:06:35 +00:00
private static boolean convertTo ( boolean toUnicode , // else to TMW
ArrayList scan ,
OutputStream out , // for toUnicode mode
TibetanDocument tdoc , // for !toUnicode mode
StringBuffer errors ,
StringBuffer warnings ,
boolean writeWarningsToOut ,
2003-09-06 22:56:10 +00:00
String warningLevel ,
boolean colors )
2003-08-31 16:06:35 +00:00
throws IOException
2003-08-18 02:38:54 +00:00
{
2003-09-07 18:30:59 +00:00
int smallFontSize = - 1 ;
int regularFontSize = - 1 ;
if ( null ! = tdoc ) {
String latinFont
= ThdlOptions . getStringOption ( " thdl.acip.to.x.latin.font " ,
2003-09-07 22:08:35 +00:00
" Times New Roman " ) ;
2003-09-07 18:30:59 +00:00
int latinFontSize
= ThdlOptions . getIntegerOption ( " thdl.acip.to.x.latin.font.size " ,
2003-09-07 22:08:35 +00:00
18 ) ;
2003-09-07 18:30:59 +00:00
tdoc . setRomanAttributeSet ( latinFont , latinFontSize ) ;
regularFontSize = tdoc . getTibetanFontSize ( ) ;
smallFontSize = ( int ) ( 0 . 75 * regularFontSize ) ;
if ( smallFontSize > = regularFontSize )
smallFontSize = regularFontSize - 1 ;
}
2003-09-06 22:56:10 +00:00
if ( colors )
tdoc . enableColors ( ) ;
else
tdoc . disableColors ( ) ;
2003-08-18 02:38:54 +00:00
int sz = scan . size ( ) ;
boolean hasErrors = false ;
2003-08-31 16:06:35 +00:00
BufferedWriter writer = null ;
if ( toUnicode )
writer
= new BufferedWriter ( new OutputStreamWriter ( out , " UTF-8 " ) ) ;
2003-09-04 04:04:21 +00:00
boolean lastGuyWasNonPunct = false ;
TStackList lastGuy = null ;
2003-09-05 05:54:35 +00:00
Color lastColor = Color . BLACK ;
Color color = Color . BLACK ;
2003-08-18 02:38:54 +00:00
for ( int i = 0 ; i < sz ; i + + ) {
ACIPString s = ( ACIPString ) scan . get ( i ) ;
int stype = s . getType ( ) ;
if ( stype = = ACIPString . ERROR ) {
2003-09-04 04:04:21 +00:00
lastGuyWasNonPunct = false ;
lastGuy = null ;
2003-08-18 02:38:54 +00:00
hasErrors = true ;
2003-08-31 16:06:35 +00:00
String text = " [#ERROR CONVERTING ACIP DOCUMENT: Lexical error: " + s . getText ( ) + " ] " ;
if ( null ! = writer ) writer . write ( text ) ;
2003-09-05 05:54:35 +00:00
if ( null ! = tdoc ) tdoc . appendRoman ( text , Color . RED ) ;
2003-09-07 16:19:50 +00:00
} else if ( stype = = ACIPString . TSHEG_BAR_ADORNMENT ) {
if ( lastGuyWasNonPunct ) {
String err = " [#ERROR CONVERTING ACIP DOCUMENT: This converter cannot yet convert " + s . getText ( ) + " because the converter's author is unclear what the result should be.] " ;
if ( null ! = writer ) {
String uni = ACIPRules . getUnicodeFor ( s . getText ( ) , false ) ;
if ( null = = uni ) {
hasErrors = true ;
uni = err ;
}
if ( null ! = writer ) writer . write ( uni ) ;
}
if ( null ! = tdoc ) {
String wylie
= ACIPRules . getWylieForACIPOther ( s . getText ( ) ) ;
if ( null = = wylie ) {
hasErrors = true ;
tdoc . appendRoman ( err , Color . RED ) ;
} else {
tdoc . appendDuffCodes ( new DuffCode [ ] { TibetanMachineWeb . getGlyph ( wylie ) } ,
Color . BLACK ) ;
}
}
} else {
hasErrors = true ;
}
lastGuyWasNonPunct = true ; // this stuff is not really punctuation
lastGuy = null ;
2003-08-24 06:40:53 +00:00
} else if ( stype = = ACIPString . WARNING ) {
2003-09-04 04:04:21 +00:00
lastGuyWasNonPunct = false ;
lastGuy = null ;
2003-08-24 06:40:53 +00:00
if ( writeWarningsToOut ) {
2003-08-31 16:06:35 +00:00
String text = " [#WARNING CONVERTING ACIP DOCUMENT: Lexical warning: " + s . getText ( ) + " ] " ;
if ( null ! = writer ) writer . write ( text ) ;
2003-09-05 05:54:35 +00:00
if ( null ! = tdoc ) tdoc . appendRoman ( text , Color . RED ) ;
2003-08-24 06:40:53 +00:00
}
2003-08-31 16:06:35 +00:00
2003-08-24 06:40:53 +00:00
if ( null ! = warnings ) {
warnings . append ( " Warning: Lexical warning: " ) ;
warnings . append ( s . getText ( ) ) ;
warnings . append ( '\n' ) ;
}
2003-08-18 02:38:54 +00:00
} else {
if ( s . isLatin ( stype ) ) {
2003-09-04 04:04:21 +00:00
lastGuyWasNonPunct = false ;
lastGuy = null ;
2003-08-31 16:06:35 +00:00
String text
= ( ( ( stype = = ACIPString . FOLIO_MARKER ) ? " { " : " " )
+ s . getText ( )
+ ( ( stype = = ACIPString . FOLIO_MARKER ) ? " } " : " " ) ) ;
if ( null ! = writer ) writer . write ( text ) ;
2003-09-05 05:54:35 +00:00
if ( null ! = tdoc ) tdoc . appendRoman ( text , Color . BLACK ) ;
2003-08-18 02:38:54 +00:00
} else {
String unicode = null ;
2003-08-31 16:06:35 +00:00
DuffCode [ ] duff = null ;
2003-08-18 02:38:54 +00:00
if ( stype = = ACIPString . TIBETAN_NON_PUNCTUATION ) {
2003-09-04 04:04:21 +00:00
lastGuyWasNonPunct = true ;
2003-08-18 02:38:54 +00:00
TPairList pl = TPairListFactory . breakACIPIntoChunks ( s . getText ( ) ) ;
String acipError ;
if ( ( acipError = pl . getACIPError ( ) ) ! = null ) {
hasErrors = true ;
String errorMessage = " [#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR ( \" SYLLABLE \" ) " + s . getText ( ) + " HAS THESE ERRORS: " + acipError + " ] " ;
2003-08-31 16:06:35 +00:00
if ( null ! = writer ) writer . write ( errorMessage ) ;
2003-09-05 05:54:35 +00:00
if ( null ! = tdoc ) tdoc . appendRoman ( errorMessage , Color . RED ) ;
2003-08-18 02:38:54 +00:00
if ( null ! = errors )
errors . append ( errorMessage + " \ n " ) ;
} else {
TParseTree pt = pl . getParseTree ( ) ;
if ( null = = pt ) {
hasErrors = true ;
String errorMessage = " [#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR ( \" SYLLABLE \" ) " + s . getText ( ) + " IS ESSENTIALLY NOTHING.] " ;
2003-08-31 16:06:35 +00:00
if ( null ! = writer ) writer . write ( errorMessage ) ;
2003-09-05 05:54:35 +00:00
if ( null ! = tdoc ) tdoc . appendRoman ( errorMessage , Color . RED ) ;
2003-08-18 02:38:54 +00:00
if ( null ! = errors )
errors . append ( errorMessage + " \ n " ) ;
} else {
TStackList sl = pt . getBestParse ( ) ;
if ( null = = sl ) {
hasErrors = true ;
String errorMessage = " [#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR ( \" SYLLABLE \" ) " + s . getText ( ) + " HAS NO LEGAL PARSES.] " ;
2003-08-31 16:06:35 +00:00
if ( null ! = writer ) writer . write ( errorMessage ) ;
2003-09-05 05:54:35 +00:00
if ( null ! = tdoc ) tdoc . appendRoman ( errorMessage , Color . RED ) ;
2003-08-18 02:38:54 +00:00
if ( null ! = errors )
errors . append ( errorMessage + " \ n " ) ;
} else {
2003-09-04 04:04:21 +00:00
lastGuy = sl ;
2003-08-23 22:03:37 +00:00
String warning
2003-08-24 06:40:53 +00:00
= pt . getWarning ( warningLevel ,
2003-08-23 22:03:37 +00:00
pl ,
s . getText ( ) ) ;
if ( null ! = warning ) {
if ( writeWarningsToOut ) {
2003-08-31 16:06:35 +00:00
String text
= ( " [#WARNING CONVERTING ACIP DOCUMENT: "
+ warning + " ] " ) ;
if ( null ! = writer ) writer . write ( text ) ;
2003-09-05 05:54:35 +00:00
if ( null ! = tdoc ) tdoc . appendRoman ( text , Color . RED ) ;
2003-08-23 22:03:37 +00:00
}
if ( null ! = warnings ) {
warnings . append ( warning ) ;
warnings . append ( '\n' ) ;
}
}
2003-08-31 16:06:35 +00:00
if ( null ! = writer ) {
unicode = sl . getUnicode ( ) ;
if ( null = = unicode ) throw new Error ( " FIXME: make this an assertion 4 " ) ;
}
if ( null ! = tdoc ) {
duff = sl . getDuff ( ) ;
2003-09-07 22:08:35 +00:00
if ( colors & & sl . isLegalTshegBar ( true ) . isLegal & & ! sl . isLegalTshegBar ( false ) . isLegal ) {
2003-09-05 06:05:46 +00:00
color = Color . YELLOW ;
2003-09-07 22:08:35 +00:00
} else if ( colors & & sl . isLegalTshegBar ( false ) . isLegal ) {
2003-09-05 05:54:35 +00:00
color = Color . BLACK ;
} else {
2003-09-05 06:05:46 +00:00
// Sanskrit
2003-09-06 22:56:10 +00:00
// DLC FIXME: a funny vowel, the presence of a sanskrit-only stack, and a funny mark like ACIP ':' should cause green too.
2003-09-05 06:05:46 +00:00
color = Color . GREEN ;
2003-09-05 05:54:35 +00:00
}
2003-08-31 16:06:35 +00:00
if ( 0 = = duff . length ) {
throw new Error ( " No DuffCodes for stack list " + sl ) ; // FIXME: make this an assertion
}
}
2003-08-18 02:38:54 +00:00
}
}
}
} else {
2003-09-05 05:54:35 +00:00
color = Color . BLACK ;
2003-08-31 16:06:35 +00:00
if ( stype = = ACIPString . START_SLASH ) {
if ( null ! = writer ) unicode = " \ u0F3C " ;
if ( null ! = tdoc ) duff = new DuffCode [ ] { TibetanMachineWeb . getGlyph ( " ( " ) } ;
} else if ( stype = = ACIPString . END_SLASH ) {
if ( null ! = writer ) unicode = " \ u0F3D " ;
if ( null ! = tdoc ) duff = new DuffCode [ ] { TibetanMachineWeb . getGlyph ( " ) " ) } ;
2003-09-05 05:08:47 +00:00
} else if ( stype = = ACIPString . TIBETAN_PUNCTUATION ) {
2003-09-04 04:04:21 +00:00
// For ACIP, tshegs are used as both
// tshegs and whitespace. We treat a
// space as a tsheg if and only if it
// occurs after TIBETAN_NON_PUNCTUATION.
// But "SHIG ,MDO" is an example of a
// special case, needed because a tsheg is
// not used after a GA in Tibetan
// typesetting.
boolean done = false ;
// DLC what about after numbers? marks?
2003-09-05 05:08:47 +00:00
TPairList lpl = null ;
2003-09-04 04:04:21 +00:00
if ( s . getText ( ) . equals ( " " ) ) {
if ( ! lastGuyWasNonPunct
| | ( null ! = lastGuy
& & ( lpl = lastGuy . get ( lastGuy . size ( ) - 1 ) ) . size ( ) = = 1
2003-09-12 05:06:37 +00:00
// "GU ," and "KU ," each have
// tshegs, but "GI ," and "KI
// ," each have a Tibetan
// space.
& & ( ( lpl . get ( 0 ) . getLeft ( ) . equals ( " G " )
| | lpl . get ( 0 ) . getLeft ( ) . equals ( " K " ) )
& & ( lpl . get ( 0 ) . getRight ( ) . indexOf ( 'U' ) < 0 ) )
& &
// it's (G . anything)
// followed by some number of
// spaces (at least one, this
// one) and then a comma:
2003-09-04 04:04:21 +00:00
peekaheadFindsSpacesAndComma ( scan , i + 1 ) ) ) {
if ( null ! = writer ) {
unicode = " " ;
done = true ;
}
if ( null ! = tdoc ) {
2003-09-05 05:54:35 +00:00
tdoc . appendRoman ( " " , Color . BLACK ) ;
2003-09-04 04:04:21 +00:00
continue ;
}
2003-09-05 05:54:35 +00:00
// DLC AM I DOING THIS? By normal Tibetan & Dzongkha spelling, writing, and input rules
// Tibetan script stacks should be entered and written: 1 headline
// consonant (0F40->0F6A), any subjoined consonant(s) (0F90->
// 0F9C), achung (0F71), shabkyu (0F74), any above headline
// vowel(s) (0F72 0F7A 0F7B 0F7C 0F7D and 0F80) ; any ngaro (0F7E,
// 0F82 and 0F83)
2003-08-31 16:06:35 +00:00
}
2003-09-05 05:08:47 +00:00
} else if ( s . getText ( ) . equals ( " , " )
& & lastGuyWasNonPunct
& & null ! = lastGuy
& & ( lpl = lastGuy . get ( lastGuy . size ( ) - 1 ) ) . size ( ) = = 1
& & lpl . get ( 0 ) . getLeft ( ) . equals ( " NG " ) ) {
DuffCode tshegDuff = TibetanMachineWeb . getGlyph ( " " ) ;
if ( null = = tshegDuff ) throw new Error ( " tsheg duff " ) ;
2003-09-05 05:54:35 +00:00
tdoc . appendDuffCodes ( new DuffCode [ ] { tshegDuff } , lastColor ) ;
2003-09-04 04:04:21 +00:00
}
2003-09-05 05:08:47 +00:00
2003-09-04 04:04:21 +00:00
if ( ! done ) {
if ( null ! = writer ) unicode = ACIPRules . getUnicodeFor ( s . getText ( ) , false ) ;
if ( null ! = tdoc ) {
2003-09-04 04:34:18 +00:00
if ( s . getText ( ) . equals ( " \ r " )
| | s . getText ( ) . equals ( " \ t " )
| | s . getText ( ) . equals ( " \ n " )
| | s . getText ( ) . equals ( " \ r \ n " ) ) {
2003-09-05 05:54:35 +00:00
tdoc . appendRoman ( s . getText ( ) , Color . BLACK ) ;
2003-09-04 04:04:21 +00:00
continue ;
2003-09-04 04:34:18 +00:00
} else {
2003-09-04 04:04:21 +00:00
String wy = ACIPRules . getWylieForACIPOther ( s . getText ( ) ) ;
if ( null = = wy ) throw new Error ( " No wylie for ACIP " + s . getText ( ) ) ;
duff = new DuffCode [ ] { TibetanMachineWeb . getGlyph ( wy ) } ;
}
2003-08-31 16:06:35 +00:00
}
}
2003-09-07 18:30:59 +00:00
} else if ( stype = = ACIPString . START_PAREN ) {
if ( null ! = tdoc ) {
tdoc . setTibetanFontSize ( smallFontSize ) ;
}
continue ;
} else if ( stype = = ACIPString . END_PAREN ) {
if ( null ! = tdoc ) {
tdoc . setTibetanFontSize ( regularFontSize ) ;
}
continue ;
2003-09-05 05:08:47 +00:00
} else {
throw new Error ( " forgot a case " ) ;
2003-08-31 16:06:35 +00:00
}
if ( null ! = writer & & null = = unicode )
throw new Error ( " FIXME: make this an assertion 1 " ) ;
if ( null ! = tdoc & & ( null = = duff | | 0 = = duff . length ) )
throw new Error ( " FIXME: make this an assertion 2 " ) ;
2003-09-04 04:04:21 +00:00
lastGuyWasNonPunct = false ;
lastGuy = null ;
2003-08-18 02:38:54 +00:00
}
2003-08-31 16:06:35 +00:00
if ( null ! = writer & & null ! = unicode ) writer . write ( unicode ) ;
if ( null ! = tdoc ) {
if ( null ! = duff & & 0 ! = duff . length ) {
2003-09-05 05:54:35 +00:00
tdoc . appendDuffCodes ( duff , color ) ;
2003-08-31 16:06:35 +00:00
} else {
// this happens when you have an
// [#ERROR]-producing tsheg bar.
// System.err.println("Bad tsheg bar with ACIP {" + s.getText() + "}");
}
2003-08-18 02:38:54 +00:00
}
}
}
2003-09-05 05:54:35 +00:00
lastColor = color ;
2003-08-18 02:38:54 +00:00
}
2003-08-31 16:06:35 +00:00
if ( null ! = writer ) {
writer . close ( ) ;
}
2003-08-18 02:38:54 +00:00
return ! hasErrors ;
}
}