Jskad/source/org/thdl/tib/text/TibetanSyllable.java
eg3p 9eedfcd909 This is Tashi's TibetanSyllable class for sorting Wylie Tibetan.
It does not have many methods for determining the root letter, suffix,
and so on, but these should be easy to add. David, please use this
class to the extent that it and your new work overlap.
2002-12-05 01:48:41 +00:00

1074 lines
31 KiB
Java

/*
The contents of this file are subject to the THDL Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License on the THDL web site
(http://www.thdl.org/).
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific terms governing rights and limitations under the
License.
The Initial Developer of this software is the Tibetan and Himalayan Digital
Library (THDL).
Copyright 2001 Tashi Tsering
All Rights Reserved
Contributor(s): ______________________________________.
*/
/*
//**************************************************************************
//
// This is the java version of the class of TibetanSyllable
// represented by Wylie translateration system. Use the class, Tibetan
// syllables can be compared. And also one can sort Tibetan syllables, words,
// phrases and sentences by comparison of two syllables. The order of the
// consonants and the order of the vowels are based on "tshig mdzod chen mo"
// (The Big Dictionary).
//
//
// Author: Tashi Tsering
// Date: 11/27/2002
// Written at: University of Virginia
//
//***********************************************************************************
//The interface of the class of TibetanSyllable:
//
//class name: TibetanSyllable
//
//constructor: TibetanSyllable ( String S )
// String S is the representation of a Tibetan syllable by string.
//
//method:
// int CompareWith( TibetanSyllable thatSyllable )
//
// return: 0 if this syllable is the same with thatSyllable;
// 1 if this syllable is bigger than thatSyllable,
// i.e. this syllable goes after thatSyllable in the order of a dictionary;
// -1 if this syllable is less than thatSyllable,
// i.e. this syllable goes before thatSyllable in the order of a dictionary;
// Those invalid syllables will be treated as the biggest syllable, that they are always
// bigger than valid syllables.;
//************************************************************************************
//
*/
package org.thdl.tib.text;
class TibetanSyllable {
String theSyllable; // The character String of the syllable.
boolean TibetanSyllableFlag; //True for Tibetan syllable, false for Sanskrit syllable.
int nComponents; // Number of Tibetan characters represented by Wylie system in the syllable.
int nVowels; // The number of vowels in the syllable.
String [][] Components; // Components of a syllable consists of Tibetan Wylie "characters".
// Components[0][0]-----Base letter
// Components[0][1]-----Superscript
// Components[0][2]-----Prefix
// Components[0][3]-----Subscript
// Components[0][4]-----vowel
// Components[0][5]-----Suffix
// Components[0][6]-----Second suffix
//You can add your own method to return different component of a syllable.
// The constructor
TibetanSyllable ( String s ){
//Filter the spaces that are at the beginning or end of the syllable.
while (s.length()>0) if(s.charAt(0) == ' ' && s.length()>1) s = s.substring(1,s.length());
else if(s.charAt(s.length()-1) == ' ' && s.length()>1) s = s.substring(0, s.length()-1);
else break;
theSyllable = s;
ItsComponents();
}
int CompareWith( TibetanSyllable s ){
int n = ( nVowels > s.GetnVowels()) ? nVowels : s.GetnVowels();
String [][] temp;
temp = s.GetComponents();
for (int i=0; i<n; i++){
for(int j=0; j<10; j++){
if(ValueOfTibetanCharacter(Components[i][j]) > ValueOfTibetanCharacter(temp[i][j]))
return 1; // This syllable is bigger than that syllable s.
else if(ValueOfTibetanCharacter(Components[i][j]) < ValueOfTibetanCharacter(temp[i][j]))
return -1; // This syllable is smaller than that syllable s.
else;
}
}
return 0; // They are the same syllable.
}
//Return the base letter of a syllable
String BaseLetter(){
return Components[0][0];
}
boolean IsTibetanSyllable(){
return TibetanSyllableFlag;
}
String GetTheSyllable(){
return theSyllable;
}
void SetTheSyllable( String s ){
theSyllable = s;
}
String [][] GetComponents( ){
return Components;
}
int GetnComponents( ){
return nComponents;
}
int ItsLength( ){
return nComponents;
}
int GetnVowels( ){
return nVowels;
}
boolean GetTibetanSyllableFlag( ){
return TibetanSyllableFlag;
}
//To examine a component to see if it is a vowel. Return true if the component is vowel.
boolean IsTibetanVowel(String thecomponent)
{
if( (thecomponent.equals("a")) ||
(thecomponent.equals("i")) ||
(thecomponent.equals("u")) ||
(thecomponent.equals("e")) ||
(thecomponent.equals("o")) )
return true;
else return false;
}
//To examine a component, see if it is a prefix. Return true if the component is prefix.
boolean IsPrefix(String thecomponent)
{
if( (thecomponent.equals("g")) ||
(thecomponent.equals("d")) ||
(thecomponent.equals("b")) ||
(thecomponent.equals("m")) ||
(thecomponent.equals("'")) )
return true;
else return false;
}
//To examine a component, see if it is a base letter. Return true if it is.
boolean IsBaseLetter(String thecomponent)
{
if( (thecomponent.equals("k")) ||
(thecomponent.equals("kh")) ||
(thecomponent.equals("g")) ||
(thecomponent.equals("ng")) ||
(thecomponent.equals("c")) ||
(thecomponent.equals("ch")) ||
(thecomponent.equals("j")) ||
(thecomponent.equals("ny")) ||
(thecomponent.equals("t")) ||
(thecomponent.equals("th")) ||
(thecomponent.equals("d")) ||
(thecomponent.equals("n")) ||
(thecomponent.equals("p")) ||
(thecomponent.equals("ph")) ||
(thecomponent.equals("b")) ||
(thecomponent.equals("m")) ||
(thecomponent.equals("ts")) ||
(thecomponent.equals("tsh")) ||
(thecomponent.equals("dz")) ||
(thecomponent.equals("w")) ||
(thecomponent.equals("zh")) ||
(thecomponent.equals("z")) ||
(thecomponent.equals("'")) ||
(thecomponent.equals("y")) ||
(thecomponent.equals(".y")) || //Special for making "g.ya" different from gya.
(thecomponent.equals("r")) ||
(thecomponent.equals("l")) ||
(thecomponent.equals("sh")) ||
(thecomponent.equals("s")) ||
(thecomponent.equals("h")) ||
(thecomponent.equals("a")) )
return true;
else return false;
}
//To examine a component, see if it is a supperscript. Return true if it is, otherwise false.
boolean IsSuperscript(String thecomponent)
{
if( (thecomponent.equals("r")) ||
(thecomponent.equals("l")) ||
(thecomponent.equals("s")) )
return true;
else return false;
}
//To examine a component, see if it is a subscript. Return true if it is, otherwise false.
boolean IsSubscript(String thecomponent)
{
if( (thecomponent.equals("w")) ||
(thecomponent.equals("y")) ||
(thecomponent.equals("r")) ||
(thecomponent.equals("l")) )
return true;
else return false;
}
//To examine a component, see if it is a suffix. Return true if it is, otherwise false.
boolean IsSuffix(String thecomponent)
{
if( (thecomponent.equals("g")) ||
(thecomponent.equals("ng")) ||
(thecomponent.equals("d")) ||
(thecomponent.equals("n")) ||
(thecomponent.equals("b")) ||
(thecomponent.equals("m")) ||
(thecomponent.equals("'")) ||
(thecomponent.equals("r")) ||
(thecomponent.equals("l")) ||
(thecomponent.equals("s")) )
return true;
else return false;
}
//To examine a component, see if it is a the second suffix. Return true if it is, otherwise false.
boolean IsSecondSuffix(String thecomponent)
{
if(thecomponent.equals("s"))
return true;
else return false;
}
//To examine a component, see if it is a Sanskrit consonant. Return true if it is, otherwise false.
boolean IsSanskritConsonant(String thecomponent)
{
if( (thecomponent.equals("T")) ||
(thecomponent.equals("Th")) ||
(thecomponent.equals("D")) ||
(thecomponent.equals("N")) ||
(thecomponent.equals("Sh")) ||
(thecomponent.equals("M")) ||
(thecomponent.equals("`")) ||
(thecomponent.equals("f")) ||
(thecomponent.equals("v")))
return true;
else return false;
}
//To examine a component, see if it is a Sanskrit vowel. Return true if it is, otherwise false.
boolean IsSanskritVowel(String thecomponent)
{
if( (thecomponent.equals("A")) ||
(thecomponent.equals("I")) ||
(thecomponent.equals("U")) ||
(thecomponent.equals("-i"))||
(thecomponent.equals("-I"))||
(thecomponent.equals("ai"))||
(thecomponent.equals("au")))
return true;
else return false;
}
//To examine a component, see if it is a Sanskrit symbole. Return true if it is, otherwise false.
boolean IsSanskritSpecialSymbol(String thecomponent)
{
if( (thecomponent.equals("+")) )
return true;
else return false;
}
//To examine a component, see if it is a Tibetan symbol. Return true if it is, otherwise false.
boolean IsThisTibetanSymbol(String thecomponent)
{
if( IsTibetanVowel(thecomponent) ||
IsBaseLetter(thecomponent) )
return true;
else return false;
}
//To examine a component, see if it is a Sanskrit symbol. Return true if it is, otherwise false.
boolean IsThisSanskritSymbol(String thecomponent)
{
if( IsSanskritVowel(thecomponent) ||
IsSanskritConsonant( thecomponent) ||
IsSanskritSpecialSymbol(thecomponent) )
return true;
else return false;
}
//To examine a component, see if it is a Tibetan symbol. Return true if it is, otherwise false.
boolean IsThisTibetanOrSanskritSymbol(String thecomponent)
{
if( IsThisTibetanSymbol(thecomponent) ||
IsThisSanskritSymbol(thecomponent) )
return true;
else return false;
}
//To examine a component, see if it is a Tibetan or Sanskrit symbol. Return true if it is, otherwise false.
boolean IsThisTibetanOrSanskritVowel(String thecomponent)
{
if( IsSanskritVowel( thecomponent) ||
IsTibetanVowel( thecomponent) )
return true;
else return false;
}
//To examine a pair of components, see if one of them is a prefix and the other one is
//a base letter that can follow the prefix. Return true if it is, otherwise false.
boolean PrefixBaseletterMatch(String prefix, String baseletter)
{
char c;
if(prefix.length()!=1) return false; //No prefix.
else c = prefix.charAt(0);
switch(c){
case 'g':
if( (baseletter.equals("c")) ||
(baseletter.equals("ny")) ||
(baseletter.equals("t")) ||
(baseletter.equals("d")) ||
(baseletter.equals("n")) ||
(baseletter.equals("ts")) ||
(baseletter.equals("zh")) ||
(baseletter.equals("z")) ||
(baseletter.equals("sh")) ||
(baseletter.equals("s")) ||
(baseletter.equals(".y")) )
return true;
else return false;
case 'd':
if( (baseletter.equals("k")) ||
(baseletter.equals("p")) ||
(baseletter.equals("g")) ||
(baseletter.equals("b")) ||
(baseletter.equals("ng")) ||
(baseletter.equals("m")) )
return true;
else return false;
case 'b':
if( (baseletter.equals("c")) ||
(baseletter.equals("g")) ||
(baseletter.equals("t")) ||
(baseletter.equals("d")) ||
(baseletter.equals("ts")) ||
(baseletter.equals("zh")) ||
(baseletter.equals("z")) ||
(baseletter.equals("sh")) ||
(baseletter.equals("s")) ||
(baseletter.equals("k")) )
return true;
else return false;
case 'm':
if( (baseletter.equals("kh")) ||
(baseletter.equals("ch")) ||
(baseletter.equals("th")) ||
(baseletter.equals("tsh")) ||
(baseletter.equals("g")) ||
(baseletter.equals("j")) ||
(baseletter.equals("d")) ||
(baseletter.equals("dz")) ||
(baseletter.equals("ng")) ||
(baseletter.equals("ny")) ||
(baseletter.equals("n")) )
return true;
else return false;
case '\'':
if( (baseletter.equals("kh")) ||
(baseletter.equals("ch")) ||
(baseletter.equals("th")) ||
(baseletter.equals("ph")) ||
(baseletter.equals("tsh")) ||
(baseletter.equals("g")) ||
(baseletter.equals("j")) ||
(baseletter.equals("d")) ||
(baseletter.equals("b")) ||
(baseletter.equals("dz") ))
return true;
else return false;
}
return false;
}
//To examine a pair of components, see if one of them is a subscript and the other one is
//a base letter that can be followed by the subscript. Return true if it is, otherwise false.
boolean BaseletterSubscriptMatch(String baseletter, String subscript)
{
char c;
if(subscript.length()!=1) return false; //No subscript.
else c = subscript.charAt(0);
switch(c){
case 'y':
if( (baseletter.equals("k")) ||
(baseletter.equals("kh")) ||
(baseletter.equals("g")) ||
(baseletter.equals("p")) ||
(baseletter.equals("ph")) ||
(baseletter.equals("b")) ||
(baseletter.equals("m")) )
return true;
else return false;
case 'r':
if( (baseletter.equals("k")) ||
(baseletter.equals("t")) ||
(baseletter.equals("p")) ||
(baseletter.equals("kh")) ||
(baseletter.equals("ph")) ||
(baseletter.equals("g")) ||
(baseletter.equals("d")) ||
(baseletter.equals("b")) ||
(baseletter.equals("h")) ||
(baseletter.equals("m")) ||
(baseletter.equals("s")))
return true;
else return false;
case 'l':
if( (baseletter.equals("k")) ||
(baseletter.equals("g")) ||
(baseletter.equals("b")) ||
(baseletter.equals("r")) ||
(baseletter.equals("s")) ||
(baseletter.equals("z") ))
return true;
else return false;
case 'w':
if( (baseletter.equals("k")) ||
(baseletter.equals("kh")) ||
(baseletter.equals("g")) ||
(baseletter.equals("ny")) ||
(baseletter.equals("d")) ||
(baseletter.equals("ch")) ||
(baseletter.equals("zh")) ||
(baseletter.equals("z")) ||
(baseletter.equals("r")) ||
(baseletter.equals("l")) ||
(baseletter.equals("sh")) ||
(baseletter.equals("s")) ||
(baseletter.equals("h") ))
return true;
else return false;
}
return false;
}
//To examine a pair of components, see if one of them is a superscript and the other one is
//a base letter that can follow the superscript. Return true if it is, otherwise false.
boolean SuperscriptBaseletterMatch(String superscript, String baseletter)
{
char c;
if(superscript.length()!=1) return false; //No superscript.
else c = superscript.charAt(0);
switch(c){
case 'r':
if( (baseletter.equals("k")) ||
(baseletter.equals("t")) ||
(baseletter.equals("ts")) ||
(baseletter.equals("g")) ||
(baseletter.equals("j")) ||
(baseletter.equals("d")) ||
(baseletter.equals("b")) ||
(baseletter.equals("dz")) ||
(baseletter.equals("ng")) ||
(baseletter.equals("ny")) ||
(baseletter.equals("n")) ||
(baseletter.equals("m") ))
return true;
else return false;
case 'l':
if( (baseletter.equals("k")) ||
(baseletter.equals("c")) ||
(baseletter.equals("t")) ||
(baseletter.equals("p")) ||
(baseletter.equals("g")) ||
(baseletter.equals("j")) ||
(baseletter.equals("d")) ||
(baseletter.equals("b")) ||
(baseletter.equals("ng")) ||
(baseletter.equals("h") ))
return true;
else return false;
case 's':
if( (baseletter.equals("k")) ||
(baseletter.equals("t")) ||
(baseletter.equals("p")) ||
(baseletter.equals("ts")) ||
(baseletter.equals("g")) ||
(baseletter.equals("d")) ||
(baseletter.equals("b")) ||
(baseletter.equals("ng")) ||
(baseletter.equals("ny")) ||
(baseletter.equals("n")) ||
(baseletter.equals("m")))
return true;
else return false;
}
return false;
}
//Assign values for Tibetan Wylie characters for comparison.
int ValueOfTibetanCharacter(String theCharacter){
if(theCharacter == null ) return 0;
if(theCharacter.equals("$")) return 0; // For non-presence.
if(theCharacter.equals("k")) return 1;
if(theCharacter.equals("kh")) return 2;
if(theCharacter.equals("g")) return 3;
if(theCharacter.equals("ng")) return 4;
if(theCharacter.equals("c")) return 5;
if(theCharacter.equals("ch")) return 6;
if(theCharacter.equals("j")) return 7;
if(theCharacter.equals("ny")) return 8;
if(theCharacter.equals("T")) return 9;
if(theCharacter.equals("Th")) return 10;
if(theCharacter.equals("D")) return 11;
if(theCharacter.equals("N")) return 12;
if(theCharacter.equals("t")) return 13;
if(theCharacter.equals("th")) return 14;
if(theCharacter.equals("d")) return 15;
if(theCharacter.equals("n")) return 16;
if(theCharacter.equals("p")) return 17;
if(theCharacter.equals("ph")) return 18;
if(theCharacter.equals("b")) return 19;
if(theCharacter.equals("m")) return 20;
if(theCharacter.equals("ts")) return 21;
if(theCharacter.equals("tsh")) return 22;
if(theCharacter.equals("dz")) return 23;
if(theCharacter.equals("w")) return 24;
if(theCharacter.equals("zh")) return 25;
if(theCharacter.equals("z")) return 26;
if(theCharacter.equals("'")) return 27;
if(theCharacter.equals("y")) return 28;
if(theCharacter.equals(".y")) return 28;
if(theCharacter.equals("r")) return 29;
if(theCharacter.equals("l")) return 30;
if(theCharacter.equals("sh")) return 31;
if(theCharacter.equals("Sh")) return 32;
if(theCharacter.equals("s")) return 33;
if(theCharacter.equals("h")) return 34;
if(theCharacter.equals("a")) return 35;
// if(theCharacter.equals("a")) return 41;
if(theCharacter.equals("A")) return 42;
if(theCharacter.equals("i")) return 43;
if(theCharacter.equals("I")) return 44;
if(theCharacter.equals("u")) return 47;
if(theCharacter.equals("U")) return 48;
if(theCharacter.equals("-i")) return 45;
if(theCharacter.equals("-I")) return 46;
if(theCharacter.equals("e")) return 49;
if(theCharacter.equals("ai")) return 50;
if(theCharacter.equals("o")) return 51;
if(theCharacter.equals("au")) return 52;
if(theCharacter.equals("invalid")) return 100;
return 100;
}
//This is the key function in the class, which extracts the components of a syllable
//from the Wylie string of the syllable and put them into the order in that we compare
//syllables each other.
void ItsComponents(){
String thisString;
String SyllableByComponents[] = new String[100]; // Syllable consist of and ordered by components represented
// by Tibetan Wylie characters. Assume there are no more than
Components = new String[10][20]; // 20 components in a syllable.
int s = 0;
nComponents = 0; // Number of Tibetan characters represented by Wylie system in the syllable.
int i=0;
//Cut the String of the syllable into the consequence of Tibetan Wylie characters of the syllable.
while ( i<theSyllable.length()) {
for( int j=1; theSyllable.length() >= (i+j); j++){
thisString = theSyllable.substring(i,i+j);
if ( IsThisTibetanOrSanskritSymbol(thisString) ) { s = j; continue;}
if ( theSyllable.length() > (i+j) && j<3 ) continue;
if ( s != 0) break;
else { InValidSyllable(); return; }
}
if ( s == 0) { InValidSyllable(); return; }
if(theSyllable.substring(i,i+s).equals("+")) { s=0; continue;} //Take off the Sanskrit stacking symbol "+" from the String.
SyllableByComponents[nComponents++] = theSyllable.substring(i,i+s);
i = i + s;
s = 0;
}
int nVowel=0; // Number of vowels in a syllable.
int nCBV[] = new int[6]; // Number of components before a vowel, assume there are 5 vowels in the syllable.
// Normallly, there is only one vowel, sometimes two vowels in a syllable.
int nCAV[] = new int[6]; // Number of components after vowel, assume there are 5 vowels in the syllable.
// Normallly, there is only one vowel, sometimes two vewls in a syllable.
boolean SanskritFlag = false; // Is the syllable Sanskrit?
TibetanSyllableFlag = true;
//Calculate nVowel, nCBV and nCAV.
for(i=0; i<nComponents; i++){
if(IsTibetanVowel(SyllableByComponents[i])){
nVowel++;
}
else if(IsSanskritVowel(SyllableByComponents[i])){
SanskritFlag = true;
TibetanSyllableFlag = false;
nVowel++;
}
else {
nCBV[nVowel+1] ++;
nCAV[nVowel] ++;
if(IsThisSanskritSymbol(SyllableByComponents[i])) { SanskritFlag = true; TibetanSyllableFlag = false; }
}
}
if(nVowel == 0) { InValidSyllable(); return; }
nVowels = nVowel;
for(i=0; i<10; i++)
for(int j=0; j<20;j++) Components[i][j] = "$"; //Assume there are at most 20 components before a vowel.
if(!SanskritFlag && nVowel<3) { //For Tibetan syllable (Tibetan syllable has no more than 2 vowels):
if(nVowel==1 ) {
switch(nCBV[1]) {
case 0: //Special case for "a", the last letter in Tibetan letter list, and the sequences led by its "i",
//"o", "u" and "e".
Components[0][0] = "a";
Components[0][1] = "$";
Components[0][2] = "$";
Components[0][3] = "$";
Components[0][4] = SyllableByComponents[0];
Components[0][5] = SyllableByComponents[1];
Components[0][6] = SyllableByComponents[2];
break;
case 1:
Components[0][0] = SyllableByComponents[0];
Components[0][1] = "$";
Components[0][2] = "$";
Components[0][3] = "$";
Components[0][4] = SyllableByComponents[1];
Components[0][5] = SyllableByComponents[2];
Components[0][6] = SyllableByComponents[3];
break;
case 2:
if(PrefixBaseletterMatch(SyllableByComponents[0], SyllableByComponents[1])) {
Components[0][0] = SyllableByComponents[1];
Components[0][1] = "$";
Components[0][2] = SyllableByComponents[0];
Components[0][3] = "$";
Components[0][4] = SyllableByComponents[2];
Components[0][5] = SyllableByComponents[3];
Components[0][6] = SyllableByComponents[4];
}
else if(BaseletterSubscriptMatch(SyllableByComponents[0], SyllableByComponents[1])) {
Components[0][0] = SyllableByComponents[0];
Components[0][1] = "$";
Components[0][2] = "$";
Components[0][3] = SyllableByComponents[1];
Components[0][4] = SyllableByComponents[2];
Components[0][5] = SyllableByComponents[3];
Components[0][6] = SyllableByComponents[4];
}
else if(SuperscriptBaseletterMatch(SyllableByComponents[0], SyllableByComponents[1])) {
Components[0][0] = SyllableByComponents[1];
Components[0][1] = SyllableByComponents[0];
Components[0][2] = "$";
Components[0][3] = "$";
Components[0][4] = SyllableByComponents[2];
Components[0][5] = SyllableByComponents[3];
Components[0][6] = SyllableByComponents[4];
}
else InValidSyllable();
break;
case 3:
if(PrefixBaseletterMatch(SyllableByComponents[0], SyllableByComponents[2]) &&
SuperscriptBaseletterMatch(SyllableByComponents[1], SyllableByComponents[2])) {
Components[0][0] = SyllableByComponents[2];
Components[0][1] = SyllableByComponents[1];
Components[0][2] = SyllableByComponents[0];
Components[0][3] = "$";
Components[0][4] = SyllableByComponents[3];
Components[0][5] = SyllableByComponents[4];
Components[0][6] = SyllableByComponents[5];
}
else if(PrefixBaseletterMatch(SyllableByComponents[0], SyllableByComponents[1]) &&
BaseletterSubscriptMatch(SyllableByComponents[1], SyllableByComponents[2])) {
Components[0][0] = SyllableByComponents[1];
Components[0][1] = "$";
Components[0][2] = SyllableByComponents[0];
Components[0][3] = SyllableByComponents[2];
Components[0][4] = SyllableByComponents[3];
Components[0][5] = SyllableByComponents[4];
Components[0][6] = SyllableByComponents[5];
}
else if(SuperscriptBaseletterMatch(SyllableByComponents[0], SyllableByComponents[1]) &&
BaseletterSubscriptMatch(SyllableByComponents[1], SyllableByComponents[2])) {
Components[0][0] = SyllableByComponents[1];
Components[0][1] = SyllableByComponents[0];
Components[0][2] = "$";
Components[0][3] = SyllableByComponents[2];
Components[0][4] = SyllableByComponents[3];
Components[0][5] = SyllableByComponents[4];
Components[0][6] = SyllableByComponents[5];
}
//For special cases of "brja", "bsnya", "brla", "bsna", ...
else if((SyllableByComponents[0]).equals("b")){
if(SuperscriptBaseletterMatch(SyllableByComponents[1], SyllableByComponents[2])) {
Components[0][0] = SyllableByComponents[2];
Components[0][1] = SyllableByComponents[1];
Components[0][2] = SyllableByComponents[0];
Components[0][3] = "$";
Components[0][4] = SyllableByComponents[3];
Components[0][5] = SyllableByComponents[4];
Components[0][6] = SyllableByComponents[5];
}
else if(BaseletterSubscriptMatch(SyllableByComponents[1], SyllableByComponents[2])) {
Components[0][0] = SyllableByComponents[1];
Components[0][1] = "$";
Components[0][2] = SyllableByComponents[0];
Components[0][3] = SyllableByComponents[2];
Components[0][4] = SyllableByComponents[3];
Components[0][5] = SyllableByComponents[4];
Components[0][6] = SyllableByComponents[5];
}
else InValidSyllable();
}
else InValidSyllable();
break;
case 4:
Components[0][0] = SyllableByComponents[2];
Components[0][1] = SyllableByComponents[1];
Components[0][2] = SyllableByComponents[0];
Components[0][3] = SyllableByComponents[3];
Components[0][4] = SyllableByComponents[4];
Components[0][5] = SyllableByComponents[5];
Components[0][6] = SyllableByComponents[6];
break;
}
}
else if(nVowel>=2 ) { //For more than two vowel Tibetan syllable, like "nga'i", "tshu'u":
int StartPoint = nCBV[0];
for(int j=0; j<nVowel; j++){
for(i=0; i<j+1; i++) StartPoint += nCBV[i]; StartPoint += j;
switch(nCBV[j+1]) {
case 0: //Special case for "a", the last letter in Tibetan letter list, and the sequence led by its "i",
//"o", "u" and "e".
Components[j][0] = "a";
Components[j][1] = "$";
Components[j][2] = "$";
Components[j][3] = "$";
Components[j][4] = SyllableByComponents[StartPoint+0];
Components[j][5] = SyllableByComponents[StartPoint+1];
Components[j][6] = SyllableByComponents[StartPoint+2];
break;
case 1:
Components[j][0] = SyllableByComponents[StartPoint+0];
Components[j][1] = "$";
Components[j][2] = "$";
Components[j][3] = "$";
Components[j][4] = SyllableByComponents[StartPoint+1];
Components[j][5] = "$";
Components[j][6] = "$";
break;
case 2:
if(PrefixBaseletterMatch(SyllableByComponents[StartPoint+0], SyllableByComponents[StartPoint+1])) {
Components[j][0] = SyllableByComponents[StartPoint+1];
Components[j][1] = "$";
Components[j][2] = SyllableByComponents[StartPoint+0];
Components[j][3] = "$";
Components[j][4] = SyllableByComponents[StartPoint+2];
Components[j][5] = "$";
Components[j][6] = "$";
}
else if(BaseletterSubscriptMatch(SyllableByComponents[StartPoint+0], SyllableByComponents[StartPoint+1])) {
Components[j][0] = SyllableByComponents[StartPoint+0];
Components[j][1] = "$";
Components[j][2] = "$";
Components[j][3] = SyllableByComponents[StartPoint+1];
Components[j][4] = SyllableByComponents[StartPoint+2];
Components[j][5] = "$";
Components[j][6] = "$";
}
else if(SuperscriptBaseletterMatch(SyllableByComponents[StartPoint+0], SyllableByComponents[StartPoint+1])) {
Components[j][0] = SyllableByComponents[StartPoint+1];
Components[0][1] = SyllableByComponents[StartPoint+0];
Components[j][2] = "$";
Components[j][3] = "$";
Components[j][4] = SyllableByComponents[StartPoint+2];
Components[j][5] = "$";
Components[j][6] = "$";
}
else InValidSyllable();
break;
case 3:
if(PrefixBaseletterMatch(SyllableByComponents[StartPoint+0], SyllableByComponents[StartPoint+2]) &&
SuperscriptBaseletterMatch(SyllableByComponents[StartPoint+1], SyllableByComponents[StartPoint+2])) {
Components[j][0] = SyllableByComponents[StartPoint+2];
Components[j][1] = SyllableByComponents[StartPoint+1];
Components[j][2] = SyllableByComponents[StartPoint+0];
Components[j][3] = "$";
Components[j][4] = SyllableByComponents[StartPoint+3];
Components[j][5] = "$";
Components[j][6] = "$";
}
else if(PrefixBaseletterMatch(SyllableByComponents[StartPoint+0], SyllableByComponents[StartPoint+1]) &&
BaseletterSubscriptMatch(SyllableByComponents[StartPoint+1], SyllableByComponents[StartPoint+2])) {
Components[j][0] = SyllableByComponents[StartPoint+1];
Components[j][1] = "$";
Components[j][2] = SyllableByComponents[StartPoint+0];
Components[j][3] = SyllableByComponents[StartPoint+2];
Components[j][4] = SyllableByComponents[StartPoint+3];
Components[j][5] = "$";
Components[j][6] = "$";
}
else if(SuperscriptBaseletterMatch(SyllableByComponents[StartPoint+0], SyllableByComponents[StartPoint+1]) &&
BaseletterSubscriptMatch(SyllableByComponents[StartPoint+1], SyllableByComponents[StartPoint+2])) {
Components[j][0] = SyllableByComponents[StartPoint+1];
Components[j][1] = SyllableByComponents[StartPoint+0];
Components[j][2] = "$";
Components[j][3] = SyllableByComponents[StartPoint+2];
Components[j][4] = SyllableByComponents[StartPoint+3];
Components[j][5] = "$";
Components[j][6] = "$";
}
//For special cases of "brja", "bsnya", "brla", "bsna", ...
else if((SyllableByComponents[StartPoint+0]).equals("b")){
if(SuperscriptBaseletterMatch(SyllableByComponents[StartPoint+1], SyllableByComponents[StartPoint+2])) {
Components[j][0] = SyllableByComponents[StartPoint+2];
Components[j][1] = SyllableByComponents[StartPoint+1];
Components[j][2] = SyllableByComponents[StartPoint+0];
Components[j][3] = "$";
Components[j][4] = SyllableByComponents[StartPoint+3];
Components[j][5] = "$";
Components[j][6] = "$";
}
else if(BaseletterSubscriptMatch(SyllableByComponents[StartPoint+1], SyllableByComponents[StartPoint+2])) {
Components[j][0] = SyllableByComponents[StartPoint+1];
Components[j][1] = "$";
Components[j][2] = SyllableByComponents[StartPoint+0];
Components[j][3] = SyllableByComponents[StartPoint+2];
Components[j][4] = SyllableByComponents[StartPoint+3];
Components[j][5] = "$";
Components[j][6] = "$";
}
else InValidSyllable();
}
else InValidSyllable();
break;
case 4:
Components[j][0] = SyllableByComponents[StartPoint+2];
Components[j][1] = SyllableByComponents[StartPoint+1];
Components[j][2] = SyllableByComponents[StartPoint+0];
Components[j][3] = SyllableByComponents[StartPoint+3];
Components[j][4] = SyllableByComponents[StartPoint+4];
Components[j][5] = "$";
Components[j][6] = "$";
break;
}
}
}
else { InValidSyllable(); return; }
}
else if(SanskritFlag) { //For Sanskrit syllable :
int StartPoint = nCBV[0];
for(int j=0; j<nVowel; j++){
for(i=0; i<j+1; i++) StartPoint += nCBV[i]; StartPoint += j;
if(nCBV[j+1]==0){ //Special case for "a", the last letter in Tibetan letter list, and the sequences led by its "i",
//"o", "u" and "e".
Components[j][0] = SyllableByComponents[StartPoint+nCBV[j+1]];
Components[j][1] = "$";
Components[j][2] = "$";
Components[j][3] = "$";
Components[j][4] = SyllableByComponents[StartPoint+nCBV[j+1]];
for(i=0; i<nCBV[j+1]; i++) Components[j][i+1+4] = SyllableByComponents[StartPoint+i];
}
else {
Components[j][0] = SyllableByComponents[StartPoint+0];
Components[j][1] = "$";
Components[j][2] = "$";
Components[j][3] = "$";
Components[j][4] = SyllableByComponents[StartPoint+nCBV[j+1]];
for(i=0; i<nCBV[j+1]; i++) Components[j][i+1+4] = SyllableByComponents[StartPoint+i];
}
}
}
else { InValidSyllable(); return; }
}
//For cleaning up the invalid syllables by throwing them at the end of the list.
void InValidSyllable(){
nVowels =1;
TibetanSyllableFlag = true;
System.out.println("This is not a valid Tibetan syllable: "+theSyllable);
for(int i=0; i<10; i++)
for(int j=0; j<20;j++) Components[i][j] = "invalid";
}
} //End of the class