2005-02-21 01:16:10 +00:00
/ *
The contents of this file are subject to the THDL Open Community License
Version 1 . 0 ( the " License " ) ; you may not use this file except in compliance
with the License . You may obtain a copy of the License on the THDL web site
( http : //www.thdl.org/).
Software distributed under the License is distributed on an " AS IS " basis ,
WITHOUT WARRANTY OF ANY KIND , either express or implied . See the
License for the specific terms governing rights and limitations under the
License .
The Initial Developer of this software is the Tibetan and Himalayan Digital
Library ( THDL ) . Portions created by the THDL are Copyright 2004 THDL .
All Rights Reserved .
Contributor ( s ) : ______________________________________ .
* /
2005-06-20 06:18:00 +00:00
// TODO(DLC)[EWTS->Tibetan]: TibetanMachineWeb has duplication of much of this!
2005-02-21 01:16:10 +00:00
package org.thdl.tib.text.ttt ;
2005-02-22 04:36:54 +00:00
import java.util.ArrayList ;
2005-06-20 06:18:00 +00:00
2005-07-06 02:19:38 +00:00
import org.thdl.tib.text.tshegbar.UnicodeUtils ;
2005-02-22 04:36:54 +00:00
import org.thdl.tib.text.DuffCode ;
2005-06-20 09:30:35 +00:00
import org.thdl.tib.text.THDLWylieConstants ;
import org.thdl.tib.text.TibTextUtils ;
2005-06-20 06:18:00 +00:00
import org.thdl.tib.text.TibetanMachineWeb ;
import org.thdl.util.ThdlDebug ;
2005-02-22 04:36:54 +00:00
2005-02-21 01:16:10 +00:00
/ * * A singleton class that should contain ( but due to laziness and
* ignorance probably does not contain ) all the traits that make EWTS
* transliteration different from other ( say , ACIP ) transliteration
* schemes . * /
2005-02-22 04:36:54 +00:00
public final class EWTSTraits implements TTraits {
2005-02-21 01:16:10 +00:00
/** sole instance of this class */
private static EWTSTraits singleton = null ;
/** Just a constructor. */
private EWTSTraits ( ) { }
/** */
2005-02-22 04:36:54 +00:00
public static synchronized EWTSTraits instance ( ) {
2005-02-21 01:16:10 +00:00
if ( null = = singleton ) {
singleton = new EWTSTraits ( ) ;
}
return singleton ;
}
/** Returns ".". */
public String disambiguator ( ) { return " . " ; }
/** Returns '.'. */
public char disambiguatorChar ( ) { return '.' ; }
2005-06-20 06:18:00 +00:00
// TODO(DLC)[EWTS->Tibetan]: isClearlyIllegal and hasSimpleError are different why?
2005-02-21 01:16:10 +00:00
public boolean hasSimpleError ( TPair p ) {
2005-06-20 06:18:00 +00:00
if ( pairHasBadWowel ( p ) ) return true ;
return ( ( " a " . equals ( p . getLeft ( ) ) & & null = = p . getRight ( ) )
| | ( " a " . equals ( p . getLeft ( ) )
& & null ! = p . getRight ( )
& & TibetanMachineWeb . isWylieVowel ( p . getRight ( ) ) ) ) ; // TODO(DLC)[EWTS->Tibetan]: or Unicode wowels? test "a\u0f74" and "a\u0f7e"
// TODO(DLC)[EWTS->Tibetan]: (a.e) is bad, one of (.a) or (a.) is bad
2005-02-21 01:16:10 +00:00
}
/ * * { tsh } , the longest consonant , has 3 characters , so this is
* three . * /
public int maxConsonantLength ( ) { return 3 ; }
2005-07-06 22:26:55 +00:00
/ * * Wowels can be arbitrarily long via stacking . But each
* component is no longer , in characters , than this . [ ~ M ` ] is
* the current winner . * /
public int maxWowelLength ( ) { return 3 ; }
2005-06-20 06:18:00 +00:00
public boolean isUnicodeConsonant ( char ch ) {
2005-07-06 02:19:38 +00:00
return ( ( ch ! = '\u0f48' & & ch > = '\u0f40' & & ch < = '\u0f6a' )
| | ( ch ! = '\u0f98' & & ch > = '\u0f90' & & ch < = '\u0fbc' )
// NOTE: \u0f88 is questionable, but we want EWTS
// [\u0f88+kha] to become "\u0f88\u0f91" and this does
// the trick.
| | ch = = '\u0f88' ) ;
2005-06-20 06:18:00 +00:00
}
public boolean isUnicodeWowel ( char ch ) {
// TODO(DLC)[EWTS->Tibetan]: what about combiners that combine only with digits? TEST
return ( ( ch > = '\u0f71' & & ch < = '\u0f84' )
2005-07-06 22:26:55 +00:00
| | '\u0f39' = = ch
2005-06-20 06:18:00 +00:00
| | isUnicodeWowelThatRequiresAChen ( ch ) ) ;
}
2005-02-21 01:16:10 +00:00
// TODO(DLC)[EWTS->Tibetan]: u,e,i,o? If not, document the special treatment in this function's comment
public boolean isConsonant ( String s ) {
2005-06-20 06:18:00 +00:00
if ( s . length ( ) = = 1 & & isUnicodeConsonant ( s . charAt ( 0 ) ) ) return true ;
if ( aVowel ( ) . equals ( s ) ) return false ; // In EWTS, "a" is both a consonant and a vowel, but we treat it as just a vowel and insert the implied a-chen if you have a TPair ( . a) (TODO(DLC)[EWTS->Tibetan]: right?)
// TODO(DLC)[EWTS->Tibetan]: numbers are consonants?
2005-02-21 01:16:10 +00:00
// TODO(DLC)[EWTS->Tibetan]: just g for now
2005-06-20 06:18:00 +00:00
return TibetanMachineWeb . isWylieChar ( s ) ;
2005-02-21 01:16:10 +00:00
}
public boolean isWowel ( String s ) {
2005-06-20 06:18:00 +00:00
return ( getUnicodeForWowel ( s ) ! = null ) ;
/ * TODO ( DLC ) [ EWTS - > Tibetan ] : test ko + m + e etc .
2005-02-21 01:16:10 +00:00
// TODO(DLC)[EWTS->Tibetan]: all non-consonant combiners? 0f71 0f87 etc.?
2005-06-20 06:18:00 +00:00
if ( s . length ( ) = = 1 & & isUnicodeWowel ( s . charAt ( 0 ) ) ) return true ;
2005-02-21 01:16:10 +00:00
return ( " a " . equals ( s )
| | " e " . equals ( s )
| | " i " . equals ( s )
| | " o " . equals ( s )
| | " u " . equals ( s )
| | " U " . equals ( s )
| | " I " . equals ( s )
| | " A " . equals ( s )
| | " -i " . equals ( s )
| | " -I " . equals ( s )
2005-06-20 06:18:00 +00:00
| | " au " . equals ( s )
| | " ai " . equals ( s )
| | isWowelThatRequiresAChen ( s ) ) ;
// TODO(DLC)[EWTS->Tibetan]:???
* /
2005-02-21 01:16:10 +00:00
}
2005-02-22 04:36:54 +00:00
public String aVowel ( ) { return " a " ; }
public boolean isPostsuffix ( String s ) {
return ( " s " . equals ( s ) | | " d " . equals ( s ) ) ;
}
public boolean isPrefix ( String l ) {
return ( " ' " . equals ( l )
| | " m " . equals ( l )
| | " b " . equals ( l )
| | " d " . equals ( l )
| | " g " . equals ( l ) ) ;
}
public boolean isSuffix ( String l ) {
return ( " s " . equals ( l )
| | " g " . equals ( l )
| | " d " . equals ( l )
| | " m " . equals ( l )
| | " ' " . equals ( l )
| | " b " . equals ( l )
| | " ng " . equals ( l )
| | " n " . equals ( l )
| | " l " . equals ( l )
| | " r " . equals ( l ) ) ;
}
/** Returns l, since this is EWTS's traits class. */
public String getEwtsForConsonant ( String l ) { return l ; }
/** Returns l, since this is EWTS's traits class. */
public String getEwtsForOther ( String l ) { return l ; }
/** Returns l, since this is EWTS's traits class. */
public String getEwtsForWowel ( String l ) { return l ; }
public TTshegBarScanner scanner ( ) { return EWTSTshegBarScanner . instance ( ) ; }
2005-07-07 02:54:36 +00:00
public void getDuffForWowel ( ArrayList duff , DuffCode preceding , String wowel )
throws IllegalArgumentException
{
2005-06-20 09:30:35 +00:00
// TODO(DLC)[EWTS->Tibetan]: I have no confidence in this! test, test, test.
2005-07-07 01:30:03 +00:00
// TODO(DLC)[EWTS->Tibetan]: ko+o doesn't work. kai+-i doesn't work.
// TODO(DLC)[EWTS->Tibetan]: kai doesn't work.
2005-06-20 09:30:35 +00:00
// Order matters here.
boolean context_added [ ] = new boolean [ ] { false } ;
if ( wowel . equals ( THDLWylieConstants . WYLIE_aVOWEL ) ) {
TibTextUtils . getVowel ( duff , preceding , THDLWylieConstants . WYLIE_aVOWEL , context_added ) ;
} else {
// TODO(DLC)[EWTS->Tibetan]: test vowel stacking
if ( wowel . indexOf ( THDLWylieConstants . U_VOWEL ) > = 0 ) {
TibTextUtils . getVowel ( duff , preceding , THDLWylieConstants . U_VOWEL , context_added ) ;
}
if ( wowel . indexOf ( THDLWylieConstants . reverse_I_VOWEL ) > = 0 ) {
TibTextUtils . getVowel ( duff , preceding , THDLWylieConstants . reverse_I_VOWEL , context_added ) ;
} else if ( wowel . indexOf ( THDLWylieConstants . I_VOWEL ) > = 0 ) {
TibTextUtils . getVowel ( duff , preceding , THDLWylieConstants . I_VOWEL , context_added ) ;
}
if ( wowel . indexOf ( THDLWylieConstants . A_VOWEL ) > = 0 ) {
TibTextUtils . getVowel ( duff , preceding , THDLWylieConstants . A_VOWEL , context_added ) ;
}
if ( wowel . indexOf ( THDLWylieConstants . ai_VOWEL ) > = 0 ) {
TibTextUtils . getVowel ( duff , preceding , THDLWylieConstants . ai_VOWEL , context_added ) ;
2005-07-07 01:30:03 +00:00
} else if ( wowel . indexOf ( THDLWylieConstants . reverse_i_VOWEL ) > = 0 ) {
2005-06-20 09:30:35 +00:00
TibTextUtils . getVowel ( duff , preceding , THDLWylieConstants . reverse_i_VOWEL , context_added ) ;
} else if ( wowel . indexOf ( THDLWylieConstants . i_VOWEL ) > = 0 ) {
TibTextUtils . getVowel ( duff , preceding , THDLWylieConstants . i_VOWEL , context_added ) ;
}
if ( wowel . indexOf ( THDLWylieConstants . e_VOWEL ) > = 0 ) {
TibTextUtils . getVowel ( duff , preceding , THDLWylieConstants . e_VOWEL , context_added ) ;
}
if ( wowel . indexOf ( THDLWylieConstants . o_VOWEL ) > = 0 ) {
TibTextUtils . getVowel ( duff , preceding , THDLWylieConstants . o_VOWEL , context_added ) ;
}
2005-07-07 01:30:03 +00:00
if ( wowel . indexOf ( THDLWylieConstants . au_VOWEL ) > = 0 ) {
TibTextUtils . getVowel ( duff , preceding , THDLWylieConstants . au_VOWEL , context_added ) ;
} else if ( wowel . indexOf ( THDLWylieConstants . u_VOWEL ) > = 0 ) {
2005-06-20 09:30:35 +00:00
TibTextUtils . getVowel ( duff , preceding , THDLWylieConstants . u_VOWEL , context_added ) ;
}
if ( wowel . indexOf ( " ~X " ) > = 0 ) { // TODO(DLC)[EWTS->Tibetan]: introduce THDLWylieConstants.blah
duff . add ( TibetanMachineWeb . getGlyph ( " ~X " ) ) ;
} else if ( wowel . indexOf ( " X " ) > = 0 ) { // TODO(DLC)[EWTS->Tibetan]: introduce THDLWylieConstants.blah
duff . add ( TibetanMachineWeb . getGlyph ( " X " ) ) ;
}
}
// FIXME: Use TMW9.61, the "o'i" special combination, when appropriate.
2005-07-07 01:30:03 +00:00
if ( wowel . indexOf ( THDLWylieConstants . BINDU ) > = 0
// TODO(DLC)[EWTS->Tibetan]: This is really ugly... we
// rely on the fact that we know every Wylie wowel that
// contains 'M'. Let's, instead, parse the wowel.
& & wowel . indexOf ( THDLWylieConstants . U0F82 ) < 0
& & wowel . indexOf ( THDLWylieConstants . U0F83 ) < 0 ) {
2005-06-20 09:30:35 +00:00
DuffCode last = null ;
2005-07-06 07:46:21 +00:00
if ( ! context_added [ 0 ] ) {
last = preceding ;
} else if ( duff . size ( ) > 0 ) {
2005-06-20 09:30:35 +00:00
last = ( DuffCode ) duff . get ( duff . size ( ) - 1 ) ;
duff . remove ( duff . size ( ) - 1 ) ; // getBindu will add it back...
// TODO(DLC)[EWTS->Tibetan]: is this okay???? when is a bindu okay to be alone???
}
TibTextUtils . getBindu ( duff , last ) ;
2005-07-07 01:30:03 +00:00
context_added [ 0 ] = true ;
}
if ( ! context_added [ 0 ] ) {
duff . add ( preceding ) ;
2005-06-20 09:30:35 +00:00
}
if ( wowel . indexOf ( 'H' ) > = 0 )
duff . add ( TibetanMachineWeb . getGlyph ( " H " ) ) ;
2005-07-07 01:30:03 +00:00
int ix ;
if ( ( ix = wowel . indexOf ( THDLWylieConstants . WYLIE_TSA_PHRU ) ) > = 0 ) {
// This likely won't look good! TMW has glyphs for [va]
// and [fa], so use that transliteration if you care, not
// [ph^] or [b^].
duff . add ( TibetanMachineWeb . getGlyph ( THDLWylieConstants . WYLIE_TSA_PHRU ) ) ;
StringBuffer sb = new StringBuffer ( wowel ) ;
sb . replace ( ix , ix + THDLWylieConstants . WYLIE_TSA_PHRU . length ( ) , " " ) ;
wowel = sb . toString ( ) ;
}
if ( ( ix = wowel . indexOf ( THDLWylieConstants . U0F82 ) ) > = 0 ) {
duff . add ( TibetanMachineWeb . getGlyph ( THDLWylieConstants . U0F82 ) ) ;
StringBuffer sb = new StringBuffer ( wowel ) ;
sb . replace ( ix , ix + THDLWylieConstants . U0F82 . length ( ) , " " ) ;
wowel = sb . toString ( ) ;
}
if ( ( ix = wowel . indexOf ( THDLWylieConstants . U0F83 ) ) > = 0 ) {
duff . add ( TibetanMachineWeb . getGlyph ( THDLWylieConstants . U0F83 ) ) ;
StringBuffer sb = new StringBuffer ( wowel ) ;
sb . replace ( ix , ix + THDLWylieConstants . U0F83 . length ( ) , " " ) ;
wowel = sb . toString ( ) ;
}
2005-06-20 09:30:35 +00:00
// TODO(DLC)[EWTS->Tibetan]: verify that no part of wowel is discarded! acip does that. 'jam~X I think we screw up, e.g.
// TODO(DLC)[EWTS->Tibetan]:: are bindus are screwed up in the unicode output? i see (with tmuni font) lone bindus without glyphs to stack on
2005-02-22 04:36:54 +00:00
}
2005-06-20 06:18:00 +00:00
public String getUnicodeForWowel ( String wowel ) {
if ( " a " . equals ( wowel ) )
return " " ;
return helpGetUnicodeForWowel ( wowel ) ;
}
private String helpGetUnicodeForWowel ( String wowel ) {
if ( " a " . equals ( wowel ) )
return null ; // ko+a+e is invalid, e.g.
if ( wowel . length ( ) = = 1 & & isUnicodeWowel ( wowel . charAt ( 0 ) ) )
return wowel ;
// handle o+u, etc.
int i ;
if ( ( i = wowel . indexOf ( " + " ) ) > = 0 ) {
// recurse.
// Chris Fynn says \u0f7c\u0f7c is different from \u0f7d.
// So o+o is not the same as au. e+e is not the same as
// ai.
String left = helpGetUnicodeForWowel ( wowel . substring ( 0 , i ) ) ;
String right = helpGetUnicodeForWowel ( wowel . substring ( i + 1 ) ) ;
if ( null ! = left & & null ! = right )
return left + right ;
else
return null ;
} else {
// Handle vowels. (TODO(dchandler): tibwn.ini has this
// info, use that instead of duplicating it in this code.)
if ( " i " . equals ( wowel ) ) return " \ u0f72 " ;
if ( " u " . equals ( wowel ) ) return " \ u0f74 " ;
if ( " A " . equals ( wowel ) ) return " \ u0f71 " ;
if ( " U " . equals ( wowel ) ) return " \ u0f71 \ u0f74 " ; // \u0f75 is discouraged
if ( " e " . equals ( wowel ) ) return " \ u0f7a " ;
if ( " o " . equals ( wowel ) ) return " \ u0f7c " ;
if ( " -i " . equals ( wowel ) ) return " \ u0f80 " ;
if ( " ai " . equals ( wowel ) ) return " \ u0f7b " ;
if ( " au " . equals ( wowel ) ) return " \ u0f7d " ;
if ( " -I " . equals ( wowel ) ) return " \ u0f81 " ;
if ( " I " . equals ( wowel ) ) return " \ u0f71 \ u0f72 " ; // \u0f73 is discouraged
2005-07-06 22:26:55 +00:00
// TODO(DLC)[EWTS->Tibetan]: test, test, test.
2005-06-20 06:18:00 +00:00
if ( " M " . equals ( wowel ) ) return " \ u0f7e " ;
if ( " H " . equals ( wowel ) ) return " \ u0f7f " ;
if ( " ? " . equals ( wowel ) ) return " \ u0f84 " ;
if ( " ~M " . equals ( wowel ) ) return " \ u0f83 " ;
if ( " ~M` " . equals ( wowel ) ) return " \ u0f82 " ;
if ( " X " . equals ( wowel ) ) return " \ u0f37 " ;
if ( " ~X " . equals ( wowel ) ) return " \ u0f35 " ;
2005-07-06 22:26:55 +00:00
if ( " ^ " . equals ( wowel ) ) return " \ u0f39 " ;
2005-06-20 06:18:00 +00:00
return null ;
}
}
public String getUnicodeFor ( String l , boolean subscribed ) {
2005-06-20 09:30:35 +00:00
2005-06-20 06:18:00 +00:00
// First, handle "\u0f71\u0f84\u0f86", "", "\u0f74", etc.
{
boolean already_done = true ;
for ( int i = 0 ; i < l . length ( ) ; i + + ) {
2005-06-20 09:30:35 +00:00
char ch = l . charAt ( i ) ;
if ( ( ch < '\u0f00' | | ch > '\u0fff' )
2005-07-06 02:19:38 +00:00
& & SAUVASTIKA ! = ch
& & SWASTIKA ! = ch
& & ( ch < PUA_MIN | | ch > PUA_MAX ) // TODO(DLC)[EWTS->Tibetan]: give a warning, though? PUA isn't specified by the unicode standard after all.
2005-06-20 09:30:35 +00:00
& & '\n' ! = ch
& & '\r' ! = ch ) {
// TODO(DLC)[EWTS->Tibetan]: Is this the place
// where we want to interpret how newlines work???
2005-06-20 06:18:00 +00:00
already_done = false ;
break ;
}
}
if ( already_done )
return l ; // TODO(dchandler): \u0fff etc. are not valid code points, though. Do we handle that well?
}
// TODO(DLC)[EWTS->Tibetan]:: vowels !subscribed could mean (a . i)???? I doubt it but test "i"->"\u0f68\u0f72" etc.
if ( subscribed ) {
if ( " R " . equals ( l ) ) return " \ u0fbc " ;
if ( " Y " . equals ( l ) ) return " \ u0fbb " ;
if ( " W " . equals ( l ) ) return " \ u0fba " ;
// g+h etc. should not be inputs to this function, but for
// completeness they're here.
if ( " k " . equals ( l ) ) return " \ u0F90 " ;
if ( " kh " . equals ( l ) ) return " \ u0F91 " ;
if ( " g " . equals ( l ) ) return " \ u0F92 " ;
if ( " g+h " . equals ( l ) ) return " \ u0F93 " ;
if ( " ng " . equals ( l ) ) return " \ u0F94 " ;
if ( " c " . equals ( l ) ) return " \ u0F95 " ;
if ( " ch " . equals ( l ) ) return " \ u0F96 " ;
if ( " j " . equals ( l ) ) return " \ u0F97 " ;
if ( " ny " . equals ( l ) ) return " \ u0F99 " ;
if ( " T " . equals ( l ) ) return " \ u0F9A " ;
if ( " Th " . equals ( l ) ) return " \ u0F9B " ;
if ( " D " . equals ( l ) ) return " \ u0F9C " ;
if ( " D+h " . equals ( l ) ) return " \ u0F9D " ;
if ( " N " . equals ( l ) ) return " \ u0F9E " ;
if ( " t " . equals ( l ) ) return " \ u0F9F " ;
if ( " th " . equals ( l ) ) return " \ u0FA0 " ;
if ( " d " . equals ( l ) ) return " \ u0FA1 " ;
if ( " d+h " . equals ( l ) ) return " \ u0FA2 " ;
if ( " n " . equals ( l ) ) return " \ u0FA3 " ;
if ( " p " . equals ( l ) ) return " \ u0FA4 " ;
if ( " ph " . equals ( l ) ) return " \ u0FA5 " ;
if ( " b " . equals ( l ) ) return " \ u0FA6 " ;
if ( " b+h " . equals ( l ) ) return " \ u0FA7 " ;
if ( " m " . equals ( l ) ) return " \ u0FA8 " ;
if ( " ts " . equals ( l ) ) return " \ u0FA9 " ;
if ( " tsh " . equals ( l ) ) return " \ u0FAA " ;
if ( " dz " . equals ( l ) ) return " \ u0FAB " ;
if ( " dz+h " . equals ( l ) ) return " \ u0FAC " ;
if ( " w " . equals ( l ) ) return " \ u0FAD " ; // TODO(DLC)[EWTS->Tibetan]:: ???
if ( " zh " . equals ( l ) ) return " \ u0FAE " ;
if ( " z " . equals ( l ) ) return " \ u0FAF " ;
if ( " ' " . equals ( l ) ) return " \ u0FB0 " ;
if ( " y " . equals ( l ) ) return " \ u0FB1 " ;
if ( " r " . equals ( l ) ) return " \ u0FB2 " ;
if ( " l " . equals ( l ) ) return " \ u0FB3 " ;
if ( " sh " . equals ( l ) ) return " \ u0FB4 " ;
if ( " Sh " . equals ( l ) ) return " \ u0FB5 " ;
if ( " s " . equals ( l ) ) return " \ u0FB6 " ;
if ( " h " . equals ( l ) ) return " \ u0FB7 " ;
if ( " a " . equals ( l ) ) return " \ u0FB8 " ;
if ( " k+Sh " . equals ( l ) ) return " \ u0FB9 " ;
2005-07-06 22:26:55 +00:00
if ( " f " . equals ( l ) ) return " \ u0FA5 \ u0F39 " ;
if ( " v " . equals ( l ) ) return " \ u0FA6 \ u0F39 " ;
2005-06-20 06:18:00 +00:00
return null ;
} else {
if ( " R " . equals ( l ) ) return " \ u0f6a " ;
if ( " Y " . equals ( l ) ) return " \ u0f61 " ;
if ( " W " . equals ( l ) ) return " \ u0f5d " ;
2005-07-10 05:01:03 +00:00
if ( " // " . equals ( l ) ) return " \ u0f0e " ;
2005-06-20 06:18:00 +00:00
if ( ! TibetanMachineWeb . isKnownHashKey ( l ) ) {
2005-07-06 02:19:38 +00:00
// System.err.println("Getting unicode for the following is hard: '"
// + l + "' (pretty string: '"
// + UnicodeUtils.unicodeStringToPrettyString(l)
// + "'");
2005-06-20 06:18:00 +00:00
ThdlDebug . noteIffyCode ( ) ;
return null ;
}
String s = TibetanMachineWeb . getUnicodeForWylieForGlyph ( l ) ;
if ( null = = s )
ThdlDebug . noteIffyCode ( ) ;
return s ;
}
}
public String shortTranslitName ( ) { return " EWTS " ; }
private boolean pairHasBadWowel ( TPair p ) {
return ( null ! = p . getRight ( )
& & ! disambiguator ( ) . equals ( p . getRight ( ) )
& & ! " + " . equals ( p . getRight ( ) )
& & null = = getUnicodeForWowel ( p . getRight ( ) ) ) ;
}
public boolean isClearlyIllegal ( TPair p ) {
if ( pairHasBadWowel ( p ) ) return true ;
if ( p . getLeft ( ) = = null
& & ( p . getRight ( ) = = null | |
( ! disambiguator ( ) . equals ( p . getRight ( ) )
& & ! isWowel ( p . getRight ( ) ) ) ) )
return true ;
if ( " + " . equals ( p . getLeft ( ) ) )
return true ;
if ( p . getLeft ( ) ! = null & & isWowel ( p . getLeft ( ) )
& & ! aVowel ( ) . equals ( p . getLeft ( ) ) ) // achen
return true ;
return false ;
}
public TPairList [ ] breakTshegBarIntoChunks ( String tt , boolean sh ) {
if ( sh ) throw new IllegalArgumentException ( " Don't do that, silly! " ) ;
try {
return TPairListFactory . breakEWTSIntoChunks ( tt ) ;
} catch ( StackOverflowError e ) {
throw new IllegalArgumentException ( " Input too large[1]: " + tt ) ;
} catch ( OutOfMemoryError e ) {
throw new IllegalArgumentException ( " Input too large[2]: " + tt ) ;
}
}
public boolean isACIP ( ) { return false ; }
public boolean vowelAloneImpliesAChen ( ) { return true ; }
public boolean vowelsMayStack ( ) { return true ; }
public boolean isWowelThatRequiresAChen ( String s ) {
// TODO(DLC)[EWTS->Tibetan]: fix me!
return ( ( s . length ( ) = = 1 & & ( isUnicodeWowelThatRequiresAChen ( s . charAt ( 0 ) )
2005-07-06 22:26:55 +00:00
| | " ?MHX^ " . indexOf ( s . charAt ( 0 ) ) > = 0 ) )
2005-06-20 06:18:00 +00:00
| | " ~X " . equals ( s )
| | " ~M " . equals ( s )
| | " ~M` " . equals ( s )
) ;
}
public boolean isUnicodeWowelThatRequiresAChen ( char ch ) {
// TODO(DLC)[EWTS->Tibetan]: ask if 18 19 3e 3f combine only with digits
2005-07-06 22:55:19 +00:00
return " \ u0f39 \ u0f35 \ u0f37 \ u0f18 \ u0f19 \ u0f3e \ u0f3f \ u0f86 \ u0f87 \ u0fc6 " . indexOf ( ch ) > = 0 ;
2005-06-20 06:18:00 +00:00
}
public boolean couldBeValidStack ( TPairList pl ) {
StringBuffer hashKey = new StringBuffer ( ) ;
boolean allHavePlus = true ;
for ( int i = 0 ; i < pl . size ( ) ; i + + ) {
if ( i + 1 < pl . size ( ) & & ! " + " . equals ( pl . get ( i ) . getRight ( ) ) )
allHavePlus = false ;
if ( 0 ! = hashKey . length ( ) )
hashKey . append ( '-' ) ;
hashKey . append ( pl . get ( i ) . getLeft ( ) ) ;
}
return ( allHavePlus
| | TibetanMachineWeb . hasGlyph ( hashKey . toString ( ) ) ) ; // TODO(DLC)[EWTS->Tibetan]: test with smra and tsma and bdgya
}
2005-07-06 02:19:38 +00:00
public boolean stackingMustBeExplicit ( ) { return true ; }
public String U0F7F ( ) { return " H " ; }
public String U0F35 ( ) { return " ~X " ; }
public String U0F37 ( ) { return " X " ; }
/ * * The EWTS standard mentions this character specifically . See
http : //www.symbols.com/encyclopedia/15/155.html to learn about
its meaning as relates to Buddhism .
* /
static final char SAUVASTIKA = '\u534d' ;
/ * * The EWTS standard mentions this character specifically . See
http : //www.symbols.com/encyclopedia/15/151.html to learn about
its meaning as relates to Buddhism .
* /
static final char SWASTIKA = '\u5350' ;
/ * * EWTS has some glyphs not specified by Unicode in the
* private - use area ( PUA ) . EWTS puts them in the range [ PUA_MIN ,
* PUA_MAX ] . ( Note that \ uf042 is the highest in use as of July
* 2 , 2005 . ) * /
static final char PUA_MIN = '\uf021' ;
/ * * EWTS has some glyphs not specified by Unicode in the
* private - use area ( PUA ) . EWTS puts them in the range [ PUA_MIN ,
* PUA_MAX ] . ( Note that \ uf042 is the highest in use as of July
* 2 , 2005 . ) * /
static final char PUA_MAX = '\uf0ff' ;
2005-02-21 01:16:10 +00:00
}