2003-08-10 19:02:56 +00:00
/ *
The contents of this file are subject to the THDL Open Community License
Version 1 . 0 ( the " License " ) ; you may not use this file except in compliance
with the License . You may obtain a copy of the License on the THDL web site
( http : //www.thdl.org/).
Software distributed under the License is distributed on an " AS IS " basis ,
WITHOUT WARRANTY OF ANY KIND , either express or implied . See the
License for the specific terms governing rights and limitations under the
License .
The Initial Developer of this software is the Tibetan and Himalayan Digital
Library ( THDL ) . Portions created by the THDL are Copyright 2003 THDL .
All Rights Reserved .
Contributor ( s ) : ______________________________________ .
* /
package org.thdl.tib.text ;
/ * * An ordered pair consisting of a Tibetan grapheme cluster ' s ( see
{ @link org . thdl . tib . text . tshegbar . UnicodeGraphemeCluster } for a
definition of the term } ) classification and its
context - insensitive THDL Extended Wylie representation . NOTE
WELL : this is not a real grapheme cluster ; I ' m misusing the term
( FIXME ) . It ' s actually whole or part of one . It ' s part of one
2003-08-23 22:03:37 +00:00
when this is U + 0F7F alone .
2003-08-10 19:02:56 +00:00
@author David Chandler * /
2003-11-23 01:22:27 +00:00
public class TGCPair implements THDLWylieConstants {
2003-08-10 19:02:56 +00:00
public static final int OTHER = 1 ;
// a standalone achen would fall into this category:
public static final int CONSONANTAL_WITHOUT_VOWEL = 2 ;
public static final int CONSONANTAL_WITH_VOWEL = 3 ;
public static final int LONE_VOWEL = 4 ;
public static final int SANSKRIT_WITHOUT_VOWEL = 5 ;
public static final int SANSKRIT_WITH_VOWEL = 6 ;
2003-08-31 20:38:28 +00:00
/ * * Returns a human - readable ( well , programmer - readable )
representation of one of the public enumerations in this
class . * /
public static final String enumToString ( int cls ) {
if ( OTHER = = cls ) return " OTHER " ;
if ( LONE_VOWEL = = cls ) return " LONE_VOWEL " ;
if ( SANSKRIT_WITH_VOWEL = = cls ) return " SANSKRIT_WITH_VOWEL " ;
if ( SANSKRIT_WITHOUT_VOWEL = = cls ) return " SANSKRIT_WITHOUT_VOWEL " ;
if ( CONSONANTAL_WITH_VOWEL = = cls ) return " CONSONANTAL_WITH_VOWEL " ;
if ( CONSONANTAL_WITHOUT_VOWEL = = cls ) return " CONSONANTAL_WITHOUT_VOWEL " ;
if ( TYPE_OTHER = = cls ) return " TYPE_OTHER " ;
if ( TYPE_SANSKRIT = = cls ) return " TYPE_SANSKRIT " ;
if ( TYPE_TIBETAN = = cls ) return " TYPE_TIBETAN " ;
return null ;
}
2003-08-23 22:03:37 +00:00
public static final int TYPE_OTHER = 31 ;
public static final int TYPE_SANSKRIT = 32 ;
public static final int TYPE_TIBETAN = 33 ;
// Sanskrit or Tibetan consonant, or number, or oddball:
private String consonantWylie ;
private String vowelWylie ;
public String getConsonantWylie ( ) {
return consonantWylie ;
}
public String getVowelWylie ( ) {
return vowelWylie ;
}
/** Cludge. */
public void setWylie ( String x ) {
consonantWylie = x ;
vowelWylie = null ;
}
public String getWylie ( ) {
2003-10-18 03:04:47 +00:00
return getWylie ( null ) ;
2003-09-12 05:06:37 +00:00
}
2003-10-18 03:04:47 +00:00
/ * * Returns the EWTS for this pair , given that , if and only if
this pair is part of an appendaged tsheg bar like ma ' ongs or
pa ' am or spre ' ur , previousTranslitIfAppendaged is non - null .
If it is non - null , then it must be equal to the EWTS
transliteration of the previous pair .
@see # getACIP ( String )
* /
public String getWylie ( String previousTranslitIfAppendaged ) {
2003-11-23 01:22:27 +00:00
if ( ACHEN . equals ( consonantWylie ) ) {
// Unlike ACIP, EWTS uses e for achen with e vowel, not ae.
if ( null = = vowelWylie )
return ACHEN ;
else
return vowelWylie ;
}
2003-08-23 22:03:37 +00:00
StringBuffer b = new StringBuffer ( ) ;
if ( consonantWylie ! = null ) {
2003-10-18 03:04:47 +00:00
// Think of pa'am... we want 'am, not 'm; 'ang, not 'ng. But we want 'ur, not 'uar, 'is, not 'ias.
if ( null ! = previousTranslitIfAppendaged
& & " ' " . equals ( previousTranslitIfAppendaged ) ) {
b . append ( " a " ) ;
}
2003-09-12 05:06:37 +00:00
2003-08-23 22:03:37 +00:00
// we may have {p-y}, but the user wants to see {py}.
for ( int i = 0 ; i < consonantWylie . length ( ) ; i + + ) {
char ch = consonantWylie . charAt ( i ) ;
if ( '-' ! = ch )
b . append ( ch ) ;
}
}
if ( vowelWylie ! = null )
b . append ( vowelWylie ) ;
return b . toString ( ) ;
}
2003-09-02 06:39:33 +00:00
public String getACIP ( ) {
2003-10-18 03:04:47 +00:00
return getACIP ( null ) ;
2003-09-12 05:06:37 +00:00
}
2004-04-14 05:44:51 +00:00
/ * * Like { @link # getWylie ( String ) } but for ACIP transliteration ,
not EWTS . * /
2003-10-18 03:04:47 +00:00
public String getACIP ( String previousTranslitIfAppendaged ) {
2003-09-02 06:39:33 +00:00
// DLC FIXME: has the EWTS change affected Manipulate.acipToWylie?
StringBuffer b = new StringBuffer ( ) ;
if ( consonantWylie ! = null ) {
2003-09-12 05:06:37 +00:00
String consonantACIP
2004-04-14 05:44:51 +00:00
= null ;
if ( " w " . equals ( consonantWylie )
& & ( SANSKRIT_WITHOUT_VOWEL = = classification
| | SANSKRIT_WITH_VOWEL = = classification ) )
consonantACIP = " V " ;
else
consonantACIP
= org . thdl . tib . text . ttt . ACIPRules . getACIPForEWTS ( consonantWylie ) ;
2003-09-12 05:06:37 +00:00
if ( null = = consonantACIP ) {
2004-04-14 05:44:51 +00:00
if ( null ! = consonantWylie & & consonantWylie . startsWith ( " R+ " ) )
return TibetanMachineWeb . getTMWToACIPErrorString ( " glyph with THDL Extended Wylie " + consonantWylie , " because the ACIP R+... could imply the short superscribed form, but this most likely intends the full form (i.e., Unicode character U+0F6A) " ) ;
return TibetanMachineWeb . getTMWToACIPErrorString ( " glyph with THDL Extended Wylie " + consonantWylie , " " ) ;
2003-09-12 05:06:37 +00:00
} else {
2004-04-14 05:44:51 +00:00
// Think of pa'am... we want 'am, not 'm; 'ang, not
// 'ng. But we want 'ur, not 'uar, 'is, not 'ias.
2003-10-18 03:04:47 +00:00
if ( null ! = previousTranslitIfAppendaged
& & " ' " . equals ( previousTranslitIfAppendaged ) ) {
b . append ( " A " ) ;
}
2003-09-12 05:06:37 +00:00
// we may have {P-Y}, but the user wants to see {PY}.
for ( int i = 0 ; i < consonantACIP . length ( ) ; i + + ) {
char ch = consonantACIP . charAt ( i ) ;
if ( '-' ! = ch )
b . append ( ch ) ;
}
2003-09-02 06:39:33 +00:00
}
}
if ( vowelWylie ! = null ) {
2003-09-12 05:06:37 +00:00
String vowelACIP
= org . thdl . tib . text . ttt . ACIPRules . getACIPForEWTS ( vowelWylie ) ;
if ( null = = vowelACIP ) {
2004-04-14 05:44:51 +00:00
return TibetanMachineWeb . getTMWToACIPErrorString ( " glyph with THDL Extended Wylie " + vowelWylie , " " ) ;
2003-09-12 05:06:37 +00:00
} else {
b . append ( vowelACIP ) ;
}
2003-09-02 06:39:33 +00:00
}
return b . toString ( ) ;
}
2003-08-10 19:02:56 +00:00
public int classification ;
2003-08-23 22:03:37 +00:00
/ * * Constructs a new TGCPair with ( Tibetan or Sanskrit ) consonant
* consonantWylie and vowel vowelWylie . Use
* classification = = TYPE_OTHER for numbers , lone vowels , marks ,
* etc . Use classification = = TYPE_TIBETAN for Tibetan ( not
* Tibetanized Sanskrit ) and classification = TYPE_SANSKRIT for
* Tibetanized Sanskrit . * /
public TGCPair ( String consonantWylie , String vowelWylie , int classification ) {
if ( " " . equals ( vowelWylie ) )
vowelWylie = null ;
// Technically, we don't need the following check, but it's
// nice for consistency's sake.
if ( " " . equals ( consonantWylie ) )
consonantWylie = null ;
// DLC FIXME: for speed, make these assertions:
if ( classification ! = TYPE_OTHER
& & classification ! = TYPE_TIBETAN
& & classification ! = TYPE_SANSKRIT ) {
throw new IllegalArgumentException ( " Bad classification " + classification + " . " ) ;
}
int realClassification = - 37 ;
if ( vowelWylie = = null & & classification = = TYPE_TIBETAN )
realClassification = CONSONANTAL_WITHOUT_VOWEL ;
if ( vowelWylie ! = null & & classification = = TYPE_TIBETAN )
realClassification = CONSONANTAL_WITH_VOWEL ;
if ( vowelWylie = = null & & classification = = TYPE_SANSKRIT )
realClassification = SANSKRIT_WITHOUT_VOWEL ;
if ( vowelWylie ! = null & & classification = = TYPE_SANSKRIT )
realClassification = SANSKRIT_WITH_VOWEL ;
if ( consonantWylie = = null ) {
if ( classification ! = TYPE_OTHER )
throw new IllegalArgumentException ( " That's the very definition of a lone vowel. " ) ;
realClassification = LONE_VOWEL ;
} else {
if ( classification = = TYPE_OTHER )
realClassification = OTHER ;
}
this . consonantWylie = consonantWylie ;
2003-09-12 05:06:37 +00:00
if ( null ! = vowelWylie ) {
if ( vowelWylie . equals ( " iA " ) | | vowelWylie . equals ( " Ai " ) )
vowelWylie = " I " ;
if ( vowelWylie . equals ( " uA " ) | | vowelWylie . equals ( " Au " ) )
vowelWylie = " U " ;
}
2003-08-23 22:03:37 +00:00
this . vowelWylie = vowelWylie ;
this . classification = realClassification ;
2003-08-10 19:02:56 +00:00
}
2003-08-23 22:03:37 +00:00
2003-08-10 19:02:56 +00:00
public String toString ( ) {
2003-08-23 22:03:37 +00:00
return " <TGCPair wylie= " + getWylie ( ) + " classification= "
2003-08-10 19:02:56 +00:00
+ classification + " /> " ;
}
}