Jskad/source/org/thdl/tib/text/TibetanSyllable.java
2005-03-08 07:59:05 +00:00

1118 lines
No EOL
33 KiB
Java

/*
* The contents of this file are subject to the THDL Open Community License
* Version 1.0 (the "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License on the THDL web site
* (http://www.thdl.org/).
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for
* the specific terms governing rights and limitations under the License.
*
* The Initial Developer of this software is the Tibetan and Himalayan Digital
* Library (THDL).
*
* Copyright 2001 Tashi Tsering All Rights Reserved
*
* Contributor(s): Andres Montano.
*/
package org.thdl.tib.text;
/** This is the java version of the class of TibetanSyllable represented by
* Wylie translateration system. Use the class, Tibetan syllables can be
* compared. And also one can sort Tibetan syllables, words, phrases and
* sentences by comparison of two syllables. The order of the consonants and
* the order of the vowels are based on "tshig mdzod chen mo" (The Big
* Dictionary).
* The interface of the class of TibetanSyllable:
*
* class name: TibetanSyllable
* constructor: TibetanSyllable ( String S )
* String S is the representation of a Tibetan syllable by string.
*
* method:
* int compareTo(Object thatSyllable)
* return: 0 if this syllable is the same with thatSyllable;
* 1 if this syllable is bigger than thatSyllable,
* i.e. this syllable goes after thatSyllable in the order of a
* dictionary;
* -1 if this syllable is less than thatSyllable,
* i.e. this syllable goes before thatSyllable in the order of a dictionary;
* Those invalid syllables will be treated as the biggest syllable, that
* they are always bigger than valid syllables.
*/
public class TibetanSyllable implements Comparable
{
/** The character String of the syllable. */
private String theSyllable;
/** True for Tibetan syllable, false for Sanskrit syllable. */
private boolean TibetanSyllableFlag;
/** Number of Tibetan characters represented by Wylie system
* in the syllable. */
private int nComponents;
/** The number of vowels in the syllable. */
private int nVowels;
/** Components of a syllable consists of Tibetan Wylie "characters". */
private String[][] Components;
// Components[0][0]-----Base letter
// Components[0][1]-----Superscript
// Components[0][2]-----Prefix
// Components[0][3]-----Subscript
// Components[0][4]-----vowel
// Components[0][5]-----Suffix
// Components[0][6]-----Second suffix
//You can add your own method to return different component of a syllable.
/** The constructor */
public TibetanSyllable(String s) {
//Filter the spaces that are at the beginning or end of the syllable.
/*
* while (s.length()>0) if(s.charAt(0) == ' ' && s.length()>1) s =
* s.substring(1,s.length()); else if(s.charAt(s.length()-1) == ' ' &&
* s.length()>1) s = s.substring(0, s.length()-1); else break;
*/
theSyllable = s.trim();
ItsComponents();
}
public int compareTo(Object o)
{
TibetanSyllable s = (TibetanSyllable) o;
int getnV = s.GetnVowels(), n = (nVowels > getnV) ? nVowels : getnV, valueOfThis, valueOfThat;
String[][] temp;
temp = s.GetComponents();
for (int i = 0; i < n; i++) {
for (int j = 0; j < 10; j++) {
valueOfThis = ValueOfTibetanCharacter(Components[i][j]);
valueOfThat = ValueOfTibetanCharacter(temp[i][j]);
if (valueOfThis > valueOfThat)
return 1; // This syllable is bigger than that syllable s.
else if (valueOfThis < valueOfThat)
return -1; // This syllable is smaller than that syllable s.
}
}
return 0; // They are the same syllable.
}
/** Return the base letter of a syllable. */
public String BaseLetter() {
return Components[0][0];
}
public boolean IsTibetanSyllable() {
return TibetanSyllableFlag;
}
public String GetTheSyllable() {
return theSyllable;
}
public String toString()
{
return theSyllable;
}
public void SetTheSyllable(String s) {
theSyllable = s;
}
private String[][] GetComponents() {
return Components;
}
private int GetnComponents() {
return nComponents;
}
private int ItsLength() {
return nComponents;
}
private int GetnVowels() {
return nVowels;
}
/** To examine a component to see if it is a vowel. Return true if the
* component is vowel.*/
public static boolean IsTibetanVowel(String thecomponent) {
if ((thecomponent.equals("a")) || (thecomponent.equals("i"))
|| (thecomponent.equals("u")) || (thecomponent.equals("e"))
|| (thecomponent.equals("o")))
return true;
else
return false;
}
/** To examine a component, see if it is a prefix. Return true if the
* component is prefix. */
public static boolean IsPrefix(String thecomponent) {
if ((thecomponent.equals("g")) || (thecomponent.equals("d"))
|| (thecomponent.equals("b")) || (thecomponent.equals("m"))
|| (thecomponent.equals("'")))
return true;
else
return false;
}
/** To examine a component, see if it is a base letter.
* Return true if it is.
*/
public static boolean IsBaseLetter(String thecomponent) {
if ((thecomponent.equals("k")) || (thecomponent.equals("kh"))
|| (thecomponent.equals("g")) || (thecomponent.equals("ng"))
|| (thecomponent.equals("c")) || (thecomponent.equals("ch"))
|| (thecomponent.equals("j")) || (thecomponent.equals("ny"))
|| (thecomponent.equals("t")) || (thecomponent.equals("th"))
|| (thecomponent.equals("d")) || (thecomponent.equals("n"))
|| (thecomponent.equals("p")) || (thecomponent.equals("ph"))
|| (thecomponent.equals("b")) || (thecomponent.equals("m"))
|| (thecomponent.equals("ts")) || (thecomponent.equals("tsh"))
|| (thecomponent.equals("dz")) || (thecomponent.equals("w"))
|| (thecomponent.equals("zh")) || (thecomponent.equals("z"))
|| (thecomponent.equals("'")) || (thecomponent.equals("y"))
|| (thecomponent.equals(".y")) || //Special for making "g.ya"
// different from gya.
(thecomponent.equals("r")) || (thecomponent.equals("l"))
|| (thecomponent.equals("sh")) || (thecomponent.equals("s"))
|| (thecomponent.equals("h")) || (thecomponent.equals("a")))
return true;
else
return false;
}
/** To examine a component, see if it is a supperscript.
* Return true if it is, otherwise false.*/
public static boolean IsSuperscript(String thecomponent) {
if ((thecomponent.equals("r")) || (thecomponent.equals("l"))
|| (thecomponent.equals("s")))
return true;
else
return false;
}
/** To examine a component, see if it is a subscript.
* Return true if it is, otherwise false. */
public static boolean IsSubscript(String thecomponent) {
if ((thecomponent.equals("w")) || (thecomponent.equals("y"))
|| (thecomponent.equals("r")) || (thecomponent.equals("l")))
return true;
else
return false;
}
/** To examine a component, see if it is a suffix.
* Return true if it is, otherwise false. */
public static boolean IsSuffix(String thecomponent) {
if ((thecomponent.equals("g")) || (thecomponent.equals("ng"))
|| (thecomponent.equals("d")) || (thecomponent.equals("n"))
|| (thecomponent.equals("b")) || (thecomponent.equals("m"))
|| (thecomponent.equals("'")) || (thecomponent.equals("r"))
|| (thecomponent.equals("l")) || (thecomponent.equals("s")))
return true;
else
return false;
}
/** To examine a component, see if it is a the second suffix.
* Return true if it is, otherwise false. */
public static boolean IsSecondSuffix(String thecomponent) {
if (thecomponent.equals("s"))
return true;
else
return false;
}
/** To examine a component, see if it is a Sanskrit consonant.
* Return true if it is, otherwise false. */
public static boolean IsSanskritConsonant(String thecomponent) {
if ((thecomponent.equals("T")) || (thecomponent.equals("Th"))
|| (thecomponent.equals("D")) || (thecomponent.equals("N"))
|| (thecomponent.equals("Sh")) || (thecomponent.equals("M"))
|| (thecomponent.equals("`")) || (thecomponent.equals("f"))
|| (thecomponent.equals("v")))
return true;
else
return false;
}
/** To examine a component, see if it is a Sanskrit vowel.
* Return true if it is, otherwise false.
*/
public static boolean IsSanskritVowel(String thecomponent) {
if ((thecomponent.equals("A")) || (thecomponent.equals("I"))
|| (thecomponent.equals("U")) || (thecomponent.equals("-i"))
|| (thecomponent.equals("-I")) || (thecomponent.equals("ai"))
|| (thecomponent.equals("au")))
return true;
else
return false;
}
/** To examine a component, see if it is a Sanskrit symbole.
* Return true if it is, otherwise false.
*/
public static boolean IsSanskritSpecialSymbol(String thecomponent) {
if ((thecomponent.equals("+")))
return true;
else
return false;
}
/** To examine a component, see if it is a Tibetan symbol.
* Return true if it is, otherwise false.
*/
public static boolean IsThisTibetanSymbol(String thecomponent) {
if (IsTibetanVowel(thecomponent) || IsBaseLetter(thecomponent))
return true;
else
return false;
}
/** To examine a component, see if it is a Sanskrit symbol.
* Return true if it is, otherwise false.
*/
public static boolean IsThisSanskritSymbol(String thecomponent) {
if (IsSanskritVowel(thecomponent) || IsSanskritConsonant(thecomponent)
|| IsSanskritSpecialSymbol(thecomponent))
return true;
else
return false;
}
/** To examine a component, see if it is a Tibetan symbol.
* Return true if it is, otherwise false.
*/
public static boolean IsThisTibetanOrSanskritSymbol(String thecomponent) {
if (IsThisTibetanSymbol(thecomponent)
|| IsThisSanskritSymbol(thecomponent))
return true;
else
return false;
}
/** To examine a component, see if it is a Tibetan or Sanskrit symbol.
* Return true if it is, otherwise false.
* @return
*/
public static boolean IsThisTibetanOrSanskritVowel(String thecomponent) {
if (IsSanskritVowel(thecomponent) || IsTibetanVowel(thecomponent))
return true;
else
return false;
}
/** To examine a pair of components, see if one of them is a prefix
* and the other one is a base letter that can follow the prefix.
* Return true if it is, otherwise false.
*/
public static boolean PrefixBaseletterMatch(String prefix, String baseletter) {
char c;
if (prefix.length() != 1)
return false; //No prefix.
else
c = prefix.charAt(0);
switch (c) {
case 'g':
if ((baseletter.equals("c")) || (baseletter.equals("ny"))
|| (baseletter.equals("t")) || (baseletter.equals("d"))
|| (baseletter.equals("n")) || (baseletter.equals("ts"))
|| (baseletter.equals("zh")) || (baseletter.equals("z"))
|| (baseletter.equals("sh")) || (baseletter.equals("s"))
|| (baseletter.equals(".y")))
return true;
else
return false;
case 'd':
if ((baseletter.equals("k")) || (baseletter.equals("p"))
|| (baseletter.equals("g")) || (baseletter.equals("b"))
|| (baseletter.equals("ng")) || (baseletter.equals("m")))
return true;
else
return false;
case 'b':
if ((baseletter.equals("c")) || (baseletter.equals("g"))
|| (baseletter.equals("t")) || (baseletter.equals("d"))
|| (baseletter.equals("ts")) || (baseletter.equals("zh"))
|| (baseletter.equals("z")) || (baseletter.equals("sh"))
|| (baseletter.equals("s")) || (baseletter.equals("k")))
return true;
else
return false;
case 'm':
if ((baseletter.equals("kh")) || (baseletter.equals("ch"))
|| (baseletter.equals("th")) || (baseletter.equals("tsh"))
|| (baseletter.equals("g")) || (baseletter.equals("j"))
|| (baseletter.equals("d")) || (baseletter.equals("dz"))
|| (baseletter.equals("ng")) || (baseletter.equals("ny"))
|| (baseletter.equals("n")))
return true;
else
return false;
case '\'':
if ((baseletter.equals("kh")) || (baseletter.equals("ch"))
|| (baseletter.equals("th")) || (baseletter.equals("ph"))
|| (baseletter.equals("tsh")) || (baseletter.equals("g"))
|| (baseletter.equals("j")) || (baseletter.equals("d"))
|| (baseletter.equals("b")) || (baseletter.equals("dz")))
return true;
else
return false;
}
return false;
}
/** To examine a pair of components, see if one of them is a subscript
* and the other one is a base letter that can be followed by the subscript.
* Return true if it is, otherwise false.
*/
public static boolean BaseletterSubscriptMatch(String baseletter, String subscript) {
char c;
if (subscript.length() != 1)
return false; //No subscript.
else
c = subscript.charAt(0);
switch (c) {
case 'y':
if ((baseletter.equals("k")) || (baseletter.equals("kh"))
|| (baseletter.equals("g")) || (baseletter.equals("p"))
|| (baseletter.equals("ph")) || (baseletter.equals("b"))
|| (baseletter.equals("m")))
return true;
else
return false;
case 'r':
if ((baseletter.equals("k")) || (baseletter.equals("t"))
|| (baseletter.equals("p")) || (baseletter.equals("kh"))
|| (baseletter.equals("ph")) || (baseletter.equals("g"))
|| (baseletter.equals("d")) || (baseletter.equals("b"))
|| (baseletter.equals("h")) || (baseletter.equals("m"))
|| (baseletter.equals("s")))
return true;
else
return false;
case 'l':
if ((baseletter.equals("k")) || (baseletter.equals("g"))
|| (baseletter.equals("b")) || (baseletter.equals("r"))
|| (baseletter.equals("s")) || (baseletter.equals("z")))
return true;
else
return false;
case 'w':
if ((baseletter.equals("k")) || (baseletter.equals("kh"))
|| (baseletter.equals("g")) || (baseletter.equals("ny"))
|| (baseletter.equals("d")) || (baseletter.equals("ch"))
|| (baseletter.equals("zh")) || (baseletter.equals("z"))
|| (baseletter.equals("r")) || (baseletter.equals("l"))
|| (baseletter.equals("sh")) || (baseletter.equals("s"))
|| (baseletter.equals("h")))
return true;
else
return false;
}
return false;
}
/** To examine a pair of components, see if one of them is a superscript
* and the other one is a base letter that can follow the superscript.
* Return true if it is, otherwise false.
*/
public static boolean SuperscriptBaseletterMatch(String superscript, String baseletter) {
char c;
if (superscript.length() != 1)
return false; //No superscript.
else
c = superscript.charAt(0);
switch (c) {
case 'r':
if ((baseletter.equals("k")) || (baseletter.equals("t"))
|| (baseletter.equals("ts")) || (baseletter.equals("g"))
|| (baseletter.equals("j")) || (baseletter.equals("d"))
|| (baseletter.equals("b")) || (baseletter.equals("dz"))
|| (baseletter.equals("ng")) || (baseletter.equals("ny"))
|| (baseletter.equals("n")) || (baseletter.equals("m")))
return true;
else
return false;
case 'l':
if ((baseletter.equals("k")) || (baseletter.equals("c"))
|| (baseletter.equals("t")) || (baseletter.equals("p"))
|| (baseletter.equals("g")) || (baseletter.equals("j"))
|| (baseletter.equals("d")) || (baseletter.equals("b"))
|| (baseletter.equals("ng")) || (baseletter.equals("h")))
return true;
else
return false;
case 's':
if ((baseletter.equals("k")) || (baseletter.equals("t"))
|| (baseletter.equals("p")) || (baseletter.equals("ts"))
|| (baseletter.equals("g")) || (baseletter.equals("d"))
|| (baseletter.equals("b")) || (baseletter.equals("ng"))
|| (baseletter.equals("ny")) || (baseletter.equals("n"))
|| (baseletter.equals("m")))
return true;
else
return false;
}
return false;
}
/** Assign values for Tibetan Wylie characters for comparison.
*/
private int ValueOfTibetanCharacter(String theCharacter) {
if (theCharacter == null)
return 0;
if (theCharacter.equals("$"))
return 0; // For non-presence.
if (theCharacter.equals("k"))
return 1;
if (theCharacter.equals("kh"))
return 2;
if (theCharacter.equals("g"))
return 3;
if (theCharacter.equals("ng"))
return 4;
if (theCharacter.equals("c"))
return 5;
if (theCharacter.equals("ch"))
return 6;
if (theCharacter.equals("j"))
return 7;
if (theCharacter.equals("ny"))
return 8;
if (theCharacter.equals("T"))
return 9;
if (theCharacter.equals("Th"))
return 10;
if (theCharacter.equals("D"))
return 11;
if (theCharacter.equals("N"))
return 12;
if (theCharacter.equals("t"))
return 13;
if (theCharacter.equals("th"))
return 14;
if (theCharacter.equals("d"))
return 15;
if (theCharacter.equals("n"))
return 16;
if (theCharacter.equals("p"))
return 17;
if (theCharacter.equals("ph"))
return 18;
if (theCharacter.equals("b"))
return 19;
if (theCharacter.equals("m"))
return 20;
if (theCharacter.equals("ts"))
return 21;
if (theCharacter.equals("tsh"))
return 22;
if (theCharacter.equals("dz"))
return 23;
if (theCharacter.equals("w"))
return 24;
if (theCharacter.equals("zh"))
return 25;
if (theCharacter.equals("z"))
return 26;
if (theCharacter.equals("'"))
return 27;
if (theCharacter.equals("y"))
return 28;
if (theCharacter.equals(".y"))
return 28;
if (theCharacter.equals("r"))
return 29;
if (theCharacter.equals("l"))
return 30;
if (theCharacter.equals("sh"))
return 31;
if (theCharacter.equals("Sh"))
return 32;
if (theCharacter.equals("s"))
return 33;
if (theCharacter.equals("h"))
return 34;
if (theCharacter.equals("a"))
return 35;
// if(theCharacter.equals("a")) return 41;
if (theCharacter.equals("A"))
return 42;
if (theCharacter.equals("i"))
return 43;
if (theCharacter.equals("I"))
return 44;
if (theCharacter.equals("u"))
return 47;
if (theCharacter.equals("U"))
return 48;
if (theCharacter.equals("-i"))
return 45;
if (theCharacter.equals("-I"))
return 46;
if (theCharacter.equals("e"))
return 49;
if (theCharacter.equals("ai"))
return 50;
if (theCharacter.equals("o"))
return 51;
if (theCharacter.equals("au"))
return 52;
if (theCharacter.equals("invalid"))
return 100;
return 100;
}
/** This is the key function in the class, which
* extracts the components of a syllable from the
* Wylie string of the syllable and put them into
* the order in that we compare syllables each other.
*/
private void ItsComponents() {
String thisString;
String SyllableByComponents[] = new String[100]; // Syllable consist of
// and ordered by
// components
// represented
// by Tibetan Wylie characters. Assume there are no more than
Components = new String[10][20]; // 20 components in a syllable.
int s = 0;
nComponents = 0; // Number of Tibetan characters represented by Wylie
// system in the syllable.
int i = 0;
//Cut the String of the syllable into the consequence of Tibetan Wylie
// characters of the syllable.
while (i < theSyllable.length()) {
for (int j = 1; theSyllable.length() >= (i + j); j++) {
thisString = theSyllable.substring(i, i + j);
if (IsThisTibetanOrSanskritSymbol(thisString)) {
s = j;
continue;
}
if (theSyllable.length() > (i + j) && j < 3)
continue;
if (s != 0)
break;
else {
InValidSyllable();
return;
}
}
if (s == 0) {
InValidSyllable();
return;
}
if (!theSyllable.substring(i, i + s).equals("+"))
SyllableByComponents[nComponents++] = theSyllable.substring(i, i + s);
/*{
// InValidSyllable();
s = 0;
continue;
} //Take off the Sanskrit stacking symbol "+" from the String.*/
i = i + s;
s = 0;
}
int nVowel = 0; // Number of vowels in a syllable.
int nCBV[] = new int[6]; // Number of components before a vowel, assume
// there are 5 vowels in the syllable.
// Normallly, there is only one vowel, sometimes two vowels in a
// syllable.
int nCAV[] = new int[6]; // Number of components after vowel, assume
// there are 5 vowels in the syllable.
// Normallly, there is only one vowel, sometimes two vewls in a
// syllable.
boolean SanskritFlag = false; // Is the syllable Sanskrit?
TibetanSyllableFlag = true;
//Calculate nVowel, nCBV and nCAV.
for (i = 0; i < nComponents; i++) {
if (IsTibetanVowel(SyllableByComponents[i])) {
nVowel++;
} else if (IsSanskritVowel(SyllableByComponents[i])) {
SanskritFlag = true;
TibetanSyllableFlag = false;
nVowel++;
} else {
nCBV[nVowel + 1]++;
nCAV[nVowel]++;
if (IsThisSanskritSymbol(SyllableByComponents[i])) {
SanskritFlag = true;
TibetanSyllableFlag = false;
}
}
}
if (nVowel == 0) {
InValidSyllable();
return;
}
nVowels = nVowel;
for (i = 0; i < 10; i++)
for (int j = 0; j < 20; j++)
Components[i][j] = "$"; //Assume there are at most 20
// components before a vowel.
if (!SanskritFlag && nVowel < 3) { //For Tibetan syllable (Tibetan
// syllable has no more than 2
// vowels):
if (nVowel == 1) {
switch (nCBV[1]) {
case 0: //Special case for "a", the last letter in Tibetan
// letter list, and the sequences led by its "i",
//"o", "u" and "e".
Components[0][0] = "a";
Components[0][1] = "$";
Components[0][2] = "$";
Components[0][3] = "$";
Components[0][4] = SyllableByComponents[0];
Components[0][5] = SyllableByComponents[1];
Components[0][6] = SyllableByComponents[2];
break;
case 1:
Components[0][0] = SyllableByComponents[0];
Components[0][1] = "$";
Components[0][2] = "$";
Components[0][3] = "$";
Components[0][4] = SyllableByComponents[1];
Components[0][5] = SyllableByComponents[2];
Components[0][6] = SyllableByComponents[3];
break;
case 2:
if (PrefixBaseletterMatch(SyllableByComponents[0],
SyllableByComponents[1])) {
Components[0][0] = SyllableByComponents[1];
Components[0][1] = "$";
Components[0][2] = SyllableByComponents[0];
Components[0][3] = "$";
Components[0][4] = SyllableByComponents[2];
Components[0][5] = SyllableByComponents[3];
Components[0][6] = SyllableByComponents[4];
}
else if (BaseletterSubscriptMatch(SyllableByComponents[0],
SyllableByComponents[1])) {
Components[0][0] = SyllableByComponents[0];
Components[0][1] = "$";
Components[0][2] = "$";
Components[0][3] = SyllableByComponents[1];
Components[0][4] = SyllableByComponents[2];
Components[0][5] = SyllableByComponents[3];
Components[0][6] = SyllableByComponents[4];
}
else if (SuperscriptBaseletterMatch(
SyllableByComponents[0], SyllableByComponents[1])) {
Components[0][0] = SyllableByComponents[1];
Components[0][1] = SyllableByComponents[0];
Components[0][2] = "$";
Components[0][3] = "$";
Components[0][4] = SyllableByComponents[2];
Components[0][5] = SyllableByComponents[3];
Components[0][6] = SyllableByComponents[4];
}
else
InValidSyllable();
break;
case 3:
if (PrefixBaseletterMatch(SyllableByComponents[0],
SyllableByComponents[2])
&& SuperscriptBaseletterMatch(
SyllableByComponents[1],
SyllableByComponents[2])) {
Components[0][0] = SyllableByComponents[2];
Components[0][1] = SyllableByComponents[1];
Components[0][2] = SyllableByComponents[0];
Components[0][3] = "$";
Components[0][4] = SyllableByComponents[3];
Components[0][5] = SyllableByComponents[4];
Components[0][6] = SyllableByComponents[5];
}
else if (PrefixBaseletterMatch(SyllableByComponents[0],
SyllableByComponents[1])
&& BaseletterSubscriptMatch(
SyllableByComponents[1],
SyllableByComponents[2])) {
Components[0][0] = SyllableByComponents[1];
Components[0][1] = "$";
Components[0][2] = SyllableByComponents[0];
Components[0][3] = SyllableByComponents[2];
Components[0][4] = SyllableByComponents[3];
Components[0][5] = SyllableByComponents[4];
Components[0][6] = SyllableByComponents[5];
}
else if (SuperscriptBaseletterMatch(
SyllableByComponents[0], SyllableByComponents[1])
&& BaseletterSubscriptMatch(
SyllableByComponents[1],
SyllableByComponents[2])) {
Components[0][0] = SyllableByComponents[1];
Components[0][1] = SyllableByComponents[0];
Components[0][2] = "$";
Components[0][3] = SyllableByComponents[2];
Components[0][4] = SyllableByComponents[3];
Components[0][5] = SyllableByComponents[4];
Components[0][6] = SyllableByComponents[5];
}
//For special cases of "brja", "bsnya", "brla", "bsna", ...
else if ((SyllableByComponents[0]).equals("b")) {
if (SuperscriptBaseletterMatch(SyllableByComponents[1],
SyllableByComponents[2])) {
Components[0][0] = SyllableByComponents[2];
Components[0][1] = SyllableByComponents[1];
Components[0][2] = SyllableByComponents[0];
Components[0][3] = "$";
Components[0][4] = SyllableByComponents[3];
Components[0][5] = SyllableByComponents[4];
Components[0][6] = SyllableByComponents[5];
} else if (BaseletterSubscriptMatch(
SyllableByComponents[1],
SyllableByComponents[2])) {
Components[0][0] = SyllableByComponents[1];
Components[0][1] = "$";
Components[0][2] = SyllableByComponents[0];
Components[0][3] = SyllableByComponents[2];
Components[0][4] = SyllableByComponents[3];
Components[0][5] = SyllableByComponents[4];
Components[0][6] = SyllableByComponents[5];
} else
InValidSyllable();
}
else
InValidSyllable();
break;
case 4:
Components[0][0] = SyllableByComponents[2];
Components[0][1] = SyllableByComponents[1];
Components[0][2] = SyllableByComponents[0];
Components[0][3] = SyllableByComponents[3];
Components[0][4] = SyllableByComponents[4];
Components[0][5] = SyllableByComponents[5];
Components[0][6] = SyllableByComponents[6];
break;
}
}
else if (nVowel >= 2) { //For more than two vowel Tibetan syllable,
// like "nga'i", "tshu'u":
int StartPoint = nCBV[0];
for (int j = 0; j < nVowel; j++) {
for (i = 0; i < j + 1; i++)
StartPoint += nCBV[i];
StartPoint += j;
switch (nCBV[j + 1]) {
case 0: //Special case for "a", the last letter in Tibetan
// letter list, and the sequence led by its "i",
//"o", "u" and "e".
Components[j][0] = "a";
Components[j][1] = "$";
Components[j][2] = "$";
Components[j][3] = "$";
Components[j][4] = SyllableByComponents[StartPoint + 0];
Components[j][5] = SyllableByComponents[StartPoint + 1];
Components[j][6] = SyllableByComponents[StartPoint + 2];
break;
case 1:
Components[j][0] = SyllableByComponents[StartPoint + 0];
Components[j][1] = "$";
Components[j][2] = "$";
Components[j][3] = "$";
Components[j][4] = SyllableByComponents[StartPoint + 1];
Components[j][5] = "$";
Components[j][6] = "$";
break;
case 2:
if (PrefixBaseletterMatch(
SyllableByComponents[StartPoint + 0],
SyllableByComponents[StartPoint + 1])) {
Components[j][0] = SyllableByComponents[StartPoint + 1];
Components[j][1] = "$";
Components[j][2] = SyllableByComponents[StartPoint + 0];
Components[j][3] = "$";
Components[j][4] = SyllableByComponents[StartPoint + 2];
Components[j][5] = "$";
Components[j][6] = "$";
}
else if (BaseletterSubscriptMatch(
SyllableByComponents[StartPoint + 0],
SyllableByComponents[StartPoint + 1])) {
Components[j][0] = SyllableByComponents[StartPoint + 0];
Components[j][1] = "$";
Components[j][2] = "$";
Components[j][3] = SyllableByComponents[StartPoint + 1];
Components[j][4] = SyllableByComponents[StartPoint + 2];
Components[j][5] = "$";
Components[j][6] = "$";
}
else if (SuperscriptBaseletterMatch(
SyllableByComponents[StartPoint + 0],
SyllableByComponents[StartPoint + 1])) {
Components[j][0] = SyllableByComponents[StartPoint + 1];
Components[0][1] = SyllableByComponents[StartPoint + 0];
Components[j][2] = "$";
Components[j][3] = "$";
Components[j][4] = SyllableByComponents[StartPoint + 2];
Components[j][5] = "$";
Components[j][6] = "$";
}
else
InValidSyllable();
break;
case 3:
if (PrefixBaseletterMatch(
SyllableByComponents[StartPoint + 0],
SyllableByComponents[StartPoint + 2])
&& SuperscriptBaseletterMatch(
SyllableByComponents[StartPoint + 1],
SyllableByComponents[StartPoint + 2])) {
Components[j][0] = SyllableByComponents[StartPoint + 2];
Components[j][1] = SyllableByComponents[StartPoint + 1];
Components[j][2] = SyllableByComponents[StartPoint + 0];
Components[j][3] = "$";
Components[j][4] = SyllableByComponents[StartPoint + 3];
Components[j][5] = "$";
Components[j][6] = "$";
}
else if (PrefixBaseletterMatch(
SyllableByComponents[StartPoint + 0],
SyllableByComponents[StartPoint + 1])
&& BaseletterSubscriptMatch(
SyllableByComponents[StartPoint + 1],
SyllableByComponents[StartPoint + 2])) {
Components[j][0] = SyllableByComponents[StartPoint + 1];
Components[j][1] = "$";
Components[j][2] = SyllableByComponents[StartPoint + 0];
Components[j][3] = SyllableByComponents[StartPoint + 2];
Components[j][4] = SyllableByComponents[StartPoint + 3];
Components[j][5] = "$";
Components[j][6] = "$";
}
else if (SuperscriptBaseletterMatch(
SyllableByComponents[StartPoint + 0],
SyllableByComponents[StartPoint + 1])
&& BaseletterSubscriptMatch(
SyllableByComponents[StartPoint + 1],
SyllableByComponents[StartPoint + 2])) {
Components[j][0] = SyllableByComponents[StartPoint + 1];
Components[j][1] = SyllableByComponents[StartPoint + 0];
Components[j][2] = "$";
Components[j][3] = SyllableByComponents[StartPoint + 2];
Components[j][4] = SyllableByComponents[StartPoint + 3];
Components[j][5] = "$";
Components[j][6] = "$";
}
//For special cases of "brja", "bsnya", "brla", "bsna",
// ...
else if ((SyllableByComponents[StartPoint + 0])
.equals("b")) {
if (SuperscriptBaseletterMatch(
SyllableByComponents[StartPoint + 1],
SyllableByComponents[StartPoint + 2])) {
Components[j][0] = SyllableByComponents[StartPoint + 2];
Components[j][1] = SyllableByComponents[StartPoint + 1];
Components[j][2] = SyllableByComponents[StartPoint + 0];
Components[j][3] = "$";
Components[j][4] = SyllableByComponents[StartPoint + 3];
Components[j][5] = "$";
Components[j][6] = "$";
} else if (BaseletterSubscriptMatch(
SyllableByComponents[StartPoint + 1],
SyllableByComponents[StartPoint + 2])) {
Components[j][0] = SyllableByComponents[StartPoint + 1];
Components[j][1] = "$";
Components[j][2] = SyllableByComponents[StartPoint + 0];
Components[j][3] = SyllableByComponents[StartPoint + 2];
Components[j][4] = SyllableByComponents[StartPoint + 3];
Components[j][5] = "$";
Components[j][6] = "$";
} else
InValidSyllable();
} else
InValidSyllable();
break;
case 4:
Components[j][0] = SyllableByComponents[StartPoint + 2];
Components[j][1] = SyllableByComponents[StartPoint + 1];
Components[j][2] = SyllableByComponents[StartPoint + 0];
Components[j][3] = SyllableByComponents[StartPoint + 3];
Components[j][4] = SyllableByComponents[StartPoint + 4];
Components[j][5] = "$";
Components[j][6] = "$";
break;
}
}
} else {
InValidSyllable();
return;
}
}
else if (SanskritFlag) { //For Sanskrit syllable :
int StartPoint = nCBV[0];
for (int j = 0; j < nVowel; j++) {
for (i = 0; i < j + 1; i++)
StartPoint += nCBV[i];
StartPoint += j;
if (nCBV[j + 1] == 0) { //Special case for "a", the last letter
// in Tibetan letter list, and the
// sequences led by its "i",
//"o", "u" and "e".
Components[j][0] = SyllableByComponents[StartPoint
+ nCBV[j + 1]];
Components[j][1] = "$";
Components[j][2] = "$";
Components[j][3] = "$";
Components[j][4] = SyllableByComponents[StartPoint
+ nCBV[j + 1]];
for (i = 0; i < nCBV[j + 1]; i++)
Components[j][i + 1 + 4] = SyllableByComponents[StartPoint
+ i];
}
else {
Components[j][0] = SyllableByComponents[StartPoint + 0];
Components[j][1] = "$";
Components[j][2] = "$";
Components[j][3] = "$";
Components[j][4] = SyllableByComponents[StartPoint
+ nCBV[j + 1]];
for (i = 0; i < nCBV[j + 1]; i++)
Components[j][i + 1 + 4] = SyllableByComponents[StartPoint
+ i];
}
}
}
else {
InValidSyllable();
return;
}
}
/** For cleaning up the invalid syllables by throwing them at the end of the list.
*
* @throws RuntimeException
*/
private void InValidSyllable() throws RuntimeException {
nVowels = 1;
TibetanSyllableFlag = true;
// System.out.println("This is not a valid Tibetan syllable: "+theSyllable);
for (int i = 0; i < 10; i++)
for (int j = 0; j < 20; j++)
Components[i][j] = "invalid";
throw new RuntimeException("This is not a valid Tibetan syllable: "
+ theSyllable);
}
} //End of the class