diff --git a/source/org/thdl/tib/text/TibetanSyllable.java b/source/org/thdl/tib/text/TibetanSyllable.java new file mode 100644 index 0000000..d808344 --- /dev/null +++ b/source/org/thdl/tib/text/TibetanSyllable.java @@ -0,0 +1,1074 @@ +/* +The contents of this file are subject to the THDL Open Community License +Version 1.0 (the "License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License on the THDL web site +(http://www.thdl.org/). + +Software distributed under the License is distributed on an "AS IS" basis, +WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +License for the specific terms governing rights and limitations under the +License. + +The Initial Developer of this software is the Tibetan and Himalayan Digital +Library (THDL). + +Copyright 2001 Tashi Tsering +All Rights Reserved + +Contributor(s): ______________________________________. +*/ +/* +//************************************************************************** +// +// This is the java version of the class of TibetanSyllable +// represented by Wylie translateration system. Use the class, Tibetan +// syllables can be compared. And also one can sort Tibetan syllables, words, +// phrases and sentences by comparison of two syllables. The order of the +// consonants and the order of the vowels are based on "tshig mdzod chen mo" +// (The Big Dictionary). +// +// +// Author: Tashi Tsering +// Date: 11/27/2002 +// Written at: University of Virginia +// +//*********************************************************************************** +//The interface of the class of TibetanSyllable: +// +//class name: TibetanSyllable +// +//constructor: TibetanSyllable ( String S ) +// String S is the representation of a Tibetan syllable by string. +// +//method: +// int CompareWith( TibetanSyllable thatSyllable ) +// +// return: 0 if this syllable is the same with thatSyllable; +// 1 if this syllable is bigger than thatSyllable, +// i.e. this syllable goes after thatSyllable in the order of a dictionary; +// -1 if this syllable is less than thatSyllable, +// i.e. this syllable goes before thatSyllable in the order of a dictionary; +// Those invalid syllables will be treated as the biggest syllable, that they are always +// bigger than valid syllables.; +//************************************************************************************ +// +*/ + + +package org.thdl.tib.text; + + +class TibetanSyllable { + + + String theSyllable; // The character String of the syllable. + boolean TibetanSyllableFlag; //True for Tibetan syllable, false for Sanskrit syllable. + int nComponents; // Number of Tibetan characters represented by Wylie system in the syllable. + int nVowels; // The number of vowels in the syllable. + String [][] Components; // Components of a syllable consists of Tibetan Wylie "characters". + // Components[0][0]-----Base letter + // Components[0][1]-----Superscript + // Components[0][2]-----Prefix + // Components[0][3]-----Subscript + // Components[0][4]-----vowel + // Components[0][5]-----Suffix + // Components[0][6]-----Second suffix + //You can add your own method to return different component of a syllable. + +// The constructor +TibetanSyllable ( String s ){ + //Filter the spaces that are at the beginning or end of the syllable. + while (s.length()>0) if(s.charAt(0) == ' ' && s.length()>1) s = s.substring(1,s.length()); + else if(s.charAt(s.length()-1) == ' ' && s.length()>1) s = s.substring(0, s.length()-1); + else break; + theSyllable = s; + ItsComponents(); +} + + + + +int CompareWith( TibetanSyllable s ){ + + int n = ( nVowels > s.GetnVowels()) ? nVowels : s.GetnVowels(); + + String [][] temp; + + temp = s.GetComponents(); + + for (int i=0; i ValueOfTibetanCharacter(temp[i][j])) + return 1; // This syllable is bigger than that syllable s. + + else if(ValueOfTibetanCharacter(Components[i][j]) < ValueOfTibetanCharacter(temp[i][j])) + return -1; // This syllable is smaller than that syllable s. + + else; + } + } + + return 0; // They are the same syllable. +} + + +//Return the base letter of a syllable +String BaseLetter(){ + return Components[0][0]; +} + + +boolean IsTibetanSyllable(){ + return TibetanSyllableFlag; +} + + + +String GetTheSyllable(){ + return theSyllable; +} + + + +void SetTheSyllable( String s ){ + theSyllable = s; +} + + +String [][] GetComponents( ){ + return Components; +} + + + +int GetnComponents( ){ + return nComponents; +} + + + +int ItsLength( ){ + return nComponents; +} + + + +int GetnVowels( ){ + return nVowels; +} + + + +boolean GetTibetanSyllableFlag( ){ + return TibetanSyllableFlag; +} + + + +//To examine a component to see if it is a vowel. Return true if the component is vowel. +boolean IsTibetanVowel(String thecomponent) +{ + + if( (thecomponent.equals("a")) || + (thecomponent.equals("i")) || + (thecomponent.equals("u")) || + (thecomponent.equals("e")) || + (thecomponent.equals("o")) ) + return true; + else return false; +} + + +//To examine a component, see if it is a prefix. Return true if the component is prefix. +boolean IsPrefix(String thecomponent) +{ + + if( (thecomponent.equals("g")) || + (thecomponent.equals("d")) || + (thecomponent.equals("b")) || + (thecomponent.equals("m")) || + (thecomponent.equals("'")) ) + return true; + else return false; + +} + + +//To examine a component, see if it is a base letter. Return true if it is. +boolean IsBaseLetter(String thecomponent) +{ + + if( (thecomponent.equals("k")) || + (thecomponent.equals("kh")) || + (thecomponent.equals("g")) || + (thecomponent.equals("ng")) || + (thecomponent.equals("c")) || + (thecomponent.equals("ch")) || + (thecomponent.equals("j")) || + (thecomponent.equals("ny")) || + (thecomponent.equals("t")) || + (thecomponent.equals("th")) || + (thecomponent.equals("d")) || + (thecomponent.equals("n")) || + (thecomponent.equals("p")) || + (thecomponent.equals("ph")) || + (thecomponent.equals("b")) || + (thecomponent.equals("m")) || + (thecomponent.equals("ts")) || + (thecomponent.equals("tsh")) || + (thecomponent.equals("dz")) || + (thecomponent.equals("w")) || + (thecomponent.equals("zh")) || + (thecomponent.equals("z")) || + (thecomponent.equals("'")) || + (thecomponent.equals("y")) || + (thecomponent.equals(".y")) || //Special for making "g.ya" different from gya. + (thecomponent.equals("r")) || + (thecomponent.equals("l")) || + (thecomponent.equals("sh")) || + (thecomponent.equals("s")) || + (thecomponent.equals("h")) || + (thecomponent.equals("a")) ) + return true; + else return false; +} + + +//To examine a component, see if it is a supperscript. Return true if it is, otherwise false. +boolean IsSuperscript(String thecomponent) +{ + + if( (thecomponent.equals("r")) || + (thecomponent.equals("l")) || + (thecomponent.equals("s")) ) + return true; + else return false; +} + + +//To examine a component, see if it is a subscript. Return true if it is, otherwise false. +boolean IsSubscript(String thecomponent) +{ + + if( (thecomponent.equals("w")) || + (thecomponent.equals("y")) || + (thecomponent.equals("r")) || + (thecomponent.equals("l")) ) + return true; + else return false; +} + + +//To examine a component, see if it is a suffix. Return true if it is, otherwise false. +boolean IsSuffix(String thecomponent) +{ + + if( (thecomponent.equals("g")) || + (thecomponent.equals("ng")) || + (thecomponent.equals("d")) || + (thecomponent.equals("n")) || + (thecomponent.equals("b")) || + (thecomponent.equals("m")) || + (thecomponent.equals("'")) || + (thecomponent.equals("r")) || + (thecomponent.equals("l")) || + (thecomponent.equals("s")) ) + return true; + else return false; +} + + +//To examine a component, see if it is a the second suffix. Return true if it is, otherwise false. +boolean IsSecondSuffix(String thecomponent) +{ + if(thecomponent.equals("s")) + return true; + else return false; +} + + +//To examine a component, see if it is a Sanskrit consonant. Return true if it is, otherwise false. +boolean IsSanskritConsonant(String thecomponent) +{ + + if( (thecomponent.equals("T")) || + (thecomponent.equals("Th")) || + (thecomponent.equals("D")) || + (thecomponent.equals("N")) || + (thecomponent.equals("Sh")) || + (thecomponent.equals("M")) || + (thecomponent.equals("`")) || + (thecomponent.equals("f")) || + (thecomponent.equals("v"))) + return true; + else return false; +} + + +//To examine a component, see if it is a Sanskrit vowel. Return true if it is, otherwise false. +boolean IsSanskritVowel(String thecomponent) +{ + + if( (thecomponent.equals("A")) || + (thecomponent.equals("I")) || + (thecomponent.equals("U")) || + (thecomponent.equals("-i"))|| + (thecomponent.equals("-I"))|| + (thecomponent.equals("ai"))|| + (thecomponent.equals("au"))) + return true; + else return false; +} + + +//To examine a component, see if it is a Sanskrit symbole. Return true if it is, otherwise false. +boolean IsSanskritSpecialSymbol(String thecomponent) +{ + if( (thecomponent.equals("+")) ) + return true; + else return false; +} + + +//To examine a component, see if it is a Tibetan symbol. Return true if it is, otherwise false. +boolean IsThisTibetanSymbol(String thecomponent) +{ + if( IsTibetanVowel(thecomponent) || + IsBaseLetter(thecomponent) ) + return true; + else return false; +} + + +//To examine a component, see if it is a Sanskrit symbol. Return true if it is, otherwise false. +boolean IsThisSanskritSymbol(String thecomponent) +{ + if( IsSanskritVowel(thecomponent) || + IsSanskritConsonant( thecomponent) || + IsSanskritSpecialSymbol(thecomponent) ) + return true; + else return false; +} + + +//To examine a component, see if it is a Tibetan symbol. Return true if it is, otherwise false. +boolean IsThisTibetanOrSanskritSymbol(String thecomponent) +{ + if( IsThisTibetanSymbol(thecomponent) || + IsThisSanskritSymbol(thecomponent) ) + return true; + else return false; +} + + +//To examine a component, see if it is a Tibetan or Sanskrit symbol. Return true if it is, otherwise false. +boolean IsThisTibetanOrSanskritVowel(String thecomponent) +{ + if( IsSanskritVowel( thecomponent) || + IsTibetanVowel( thecomponent) ) + return true; + else return false; +} + + +//To examine a pair of components, see if one of them is a prefix and the other one is +//a base letter that can follow the prefix. Return true if it is, otherwise false. +boolean PrefixBaseletterMatch(String prefix, String baseletter) +{ + char c; + if(prefix.length()!=1) return false; //No prefix. + else c = prefix.charAt(0); + + switch(c){ + case 'g': + if( (baseletter.equals("c")) || + (baseletter.equals("ny")) || + (baseletter.equals("t")) || + (baseletter.equals("d")) || + (baseletter.equals("n")) || + (baseletter.equals("ts")) || + (baseletter.equals("zh")) || + (baseletter.equals("z")) || + (baseletter.equals("sh")) || + (baseletter.equals("s")) || + (baseletter.equals(".y")) ) + return true; + else return false; + + case 'd': + if( (baseletter.equals("k")) || + (baseletter.equals("p")) || + (baseletter.equals("g")) || + (baseletter.equals("b")) || + (baseletter.equals("ng")) || + (baseletter.equals("m")) ) + return true; + else return false; + + case 'b': + if( (baseletter.equals("c")) || + (baseletter.equals("g")) || + (baseletter.equals("t")) || + (baseletter.equals("d")) || + (baseletter.equals("ts")) || + (baseletter.equals("zh")) || + (baseletter.equals("z")) || + (baseletter.equals("sh")) || + (baseletter.equals("s")) || + (baseletter.equals("k")) ) + return true; + else return false; + + case 'm': + if( (baseletter.equals("kh")) || + (baseletter.equals("ch")) || + (baseletter.equals("th")) || + (baseletter.equals("tsh")) || + (baseletter.equals("g")) || + (baseletter.equals("j")) || + (baseletter.equals("d")) || + (baseletter.equals("dz")) || + (baseletter.equals("ng")) || + (baseletter.equals("ny")) || + (baseletter.equals("n")) ) + return true; + else return false; + + case '\'': + if( (baseletter.equals("kh")) || + (baseletter.equals("ch")) || + (baseletter.equals("th")) || + (baseletter.equals("ph")) || + (baseletter.equals("tsh")) || + (baseletter.equals("g")) || + (baseletter.equals("j")) || + (baseletter.equals("d")) || + (baseletter.equals("b")) || + (baseletter.equals("dz") )) + return true; + else return false; + + + } + return false; +} + + +//To examine a pair of components, see if one of them is a subscript and the other one is +//a base letter that can be followed by the subscript. Return true if it is, otherwise false. +boolean BaseletterSubscriptMatch(String baseletter, String subscript) +{ + char c; + if(subscript.length()!=1) return false; //No subscript. + else c = subscript.charAt(0); + + switch(c){ + case 'y': + if( (baseletter.equals("k")) || + (baseletter.equals("kh")) || + (baseletter.equals("g")) || + (baseletter.equals("p")) || + (baseletter.equals("ph")) || + (baseletter.equals("b")) || + (baseletter.equals("m")) ) + return true; + else return false; + + case 'r': + if( (baseletter.equals("k")) || + (baseletter.equals("t")) || + (baseletter.equals("p")) || + (baseletter.equals("kh")) || + (baseletter.equals("ph")) || + (baseletter.equals("g")) || + (baseletter.equals("d")) || + (baseletter.equals("b")) || + (baseletter.equals("h")) || + (baseletter.equals("m")) || + (baseletter.equals("s"))) + return true; + else return false; + + + case 'l': + if( (baseletter.equals("k")) || + (baseletter.equals("g")) || + (baseletter.equals("b")) || + (baseletter.equals("r")) || + (baseletter.equals("s")) || + (baseletter.equals("z") )) + return true; + else return false; + + case 'w': + if( (baseletter.equals("k")) || + (baseletter.equals("kh")) || + (baseletter.equals("g")) || + (baseletter.equals("ny")) || + (baseletter.equals("d")) || + (baseletter.equals("ch")) || + (baseletter.equals("zh")) || + (baseletter.equals("z")) || + (baseletter.equals("r")) || + (baseletter.equals("l")) || + (baseletter.equals("sh")) || + (baseletter.equals("s")) || + (baseletter.equals("h") )) + return true; + else return false; + + } + return false; +} + + +//To examine a pair of components, see if one of them is a superscript and the other one is +//a base letter that can follow the superscript. Return true if it is, otherwise false. +boolean SuperscriptBaseletterMatch(String superscript, String baseletter) +{ + char c; + if(superscript.length()!=1) return false; //No superscript. + else c = superscript.charAt(0); + + switch(c){ + case 'r': + if( (baseletter.equals("k")) || + (baseletter.equals("t")) || + (baseletter.equals("ts")) || + (baseletter.equals("g")) || + (baseletter.equals("j")) || + (baseletter.equals("d")) || + (baseletter.equals("b")) || + (baseletter.equals("dz")) || + (baseletter.equals("ng")) || + (baseletter.equals("ny")) || + (baseletter.equals("n")) || + (baseletter.equals("m") )) + return true; + else return false; + + case 'l': + if( (baseletter.equals("k")) || + (baseletter.equals("c")) || + (baseletter.equals("t")) || + (baseletter.equals("p")) || + (baseletter.equals("g")) || + (baseletter.equals("j")) || + (baseletter.equals("d")) || + (baseletter.equals("b")) || + (baseletter.equals("ng")) || + (baseletter.equals("h") )) + return true; + else return false; + + case 's': + if( (baseletter.equals("k")) || + (baseletter.equals("t")) || + (baseletter.equals("p")) || + (baseletter.equals("ts")) || + (baseletter.equals("g")) || + (baseletter.equals("d")) || + (baseletter.equals("b")) || + (baseletter.equals("ng")) || + (baseletter.equals("ny")) || + (baseletter.equals("n")) || + (baseletter.equals("m"))) + return true; + else return false; + + } + return false; +} + + + +//Assign values for Tibetan Wylie characters for comparison. +int ValueOfTibetanCharacter(String theCharacter){ + + if(theCharacter == null ) return 0; + if(theCharacter.equals("$")) return 0; // For non-presence. + + if(theCharacter.equals("k")) return 1; + if(theCharacter.equals("kh")) return 2; + if(theCharacter.equals("g")) return 3; + if(theCharacter.equals("ng")) return 4; + if(theCharacter.equals("c")) return 5; + if(theCharacter.equals("ch")) return 6; + if(theCharacter.equals("j")) return 7; + if(theCharacter.equals("ny")) return 8; + + + if(theCharacter.equals("T")) return 9; + if(theCharacter.equals("Th")) return 10; + if(theCharacter.equals("D")) return 11; + if(theCharacter.equals("N")) return 12; + + if(theCharacter.equals("t")) return 13; + if(theCharacter.equals("th")) return 14; + if(theCharacter.equals("d")) return 15; + if(theCharacter.equals("n")) return 16; + if(theCharacter.equals("p")) return 17; + if(theCharacter.equals("ph")) return 18; + if(theCharacter.equals("b")) return 19; + if(theCharacter.equals("m")) return 20; + if(theCharacter.equals("ts")) return 21; + if(theCharacter.equals("tsh")) return 22; + if(theCharacter.equals("dz")) return 23; + if(theCharacter.equals("w")) return 24; + if(theCharacter.equals("zh")) return 25; + if(theCharacter.equals("z")) return 26; + if(theCharacter.equals("'")) return 27; + if(theCharacter.equals("y")) return 28; + if(theCharacter.equals(".y")) return 28; + if(theCharacter.equals("r")) return 29; + if(theCharacter.equals("l")) return 30; + if(theCharacter.equals("sh")) return 31; + if(theCharacter.equals("Sh")) return 32; + if(theCharacter.equals("s")) return 33; + if(theCharacter.equals("h")) return 34; + if(theCharacter.equals("a")) return 35; + +// if(theCharacter.equals("a")) return 41; + if(theCharacter.equals("A")) return 42; + if(theCharacter.equals("i")) return 43; + if(theCharacter.equals("I")) return 44; + if(theCharacter.equals("u")) return 47; + if(theCharacter.equals("U")) return 48; + if(theCharacter.equals("-i")) return 45; + if(theCharacter.equals("-I")) return 46; + if(theCharacter.equals("e")) return 49; + if(theCharacter.equals("ai")) return 50; + if(theCharacter.equals("o")) return 51; + if(theCharacter.equals("au")) return 52; + + if(theCharacter.equals("invalid")) return 100; + + return 100; +} + + +//This is the key function in the class, which extracts the components of a syllable +//from the Wylie string of the syllable and put them into the order in that we compare +//syllables each other. + +void ItsComponents(){ + + String thisString; + String SyllableByComponents[] = new String[100]; // Syllable consist of and ordered by components represented + // by Tibetan Wylie characters. Assume there are no more than + Components = new String[10][20]; // 20 components in a syllable. + + + int s = 0; + nComponents = 0; // Number of Tibetan characters represented by Wylie system in the syllable. + int i=0; + + //Cut the String of the syllable into the consequence of Tibetan Wylie characters of the syllable. + while ( i= (i+j); j++){ + thisString = theSyllable.substring(i,i+j); + if ( IsThisTibetanOrSanskritSymbol(thisString) ) { s = j; continue;} + if ( theSyllable.length() > (i+j) && j<3 ) continue; + if ( s != 0) break; + else { InValidSyllable(); return; } + } + if ( s == 0) { InValidSyllable(); return; } + + if(theSyllable.substring(i,i+s).equals("+")) { s=0; continue;} //Take off the Sanskrit stacking symbol "+" from the String. + + SyllableByComponents[nComponents++] = theSyllable.substring(i,i+s); + i = i + s; + s = 0; + } + + + int nVowel=0; // Number of vowels in a syllable. + + int nCBV[] = new int[6]; // Number of components before a vowel, assume there are 5 vowels in the syllable. + // Normallly, there is only one vowel, sometimes two vowels in a syllable. + int nCAV[] = new int[6]; // Number of components after vowel, assume there are 5 vowels in the syllable. + // Normallly, there is only one vowel, sometimes two vewls in a syllable. + boolean SanskritFlag = false; // Is the syllable Sanskrit? + + TibetanSyllableFlag = true; + + //Calculate nVowel, nCBV and nCAV. + for(i=0; i=2 ) { //For more than two vowel Tibetan syllable, like "nga'i", "tshu'u": + + int StartPoint = nCBV[0]; + + for(int j=0; j