Jskad/source/org/thdl/tib/text/TibTextUtils.java
dchandler a6cc4a7ff3 Removed/commented out/tagged some unused local variables.
Added a JUnit test for the new Trie that fails at present since the Trie is
case-insensitive.  Running JUnit tests is not something our build system
knows about at present, but Eclipse 2.0 makes it very easy.

Fixed a few compiler errors due to imports I'd forgotten.
2002-11-02 16:01:40 +00:00

1097 lines
33 KiB
Java

/*
The contents of this file are subject to the THDL Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License on the THDL web site
(http://www.thdl.org/).
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific terms governing rights and limitations under the
License.
The Initial Developer of this software is the Tibetan and Himalayan Digital
Library (THDL). Portions created by the THDL are Copyright 2001 THDL.
All Rights Reserved.
Contributor(s): ______________________________________.
*/
package org.thdl.tib.text;
import java.util.*;
import javax.swing.*;
import javax.swing.text.*;
import javax.swing.text.rtf.RTFEditorKit;
import java.io.*;
import org.thdl.util.ThdlDebug;
/**
* Provides methods for converting back and forth between Extended
* Wylie and TibetanMachineWeb. This class is not instantiable.
*
* <p>
* The class provides a variety of static methods for converting
* back and forth between Extended Wylie and TibetanMachineWeb. The
* Wylie can be accessed as a String, while the TibetanMachineWeb can
* be exported as Rich Text Format.
*
* @author Edward Garrett, Tibetan and Himalayan Digital Library */
public class TibTextUtils {
/** Do not use this contructor. */
private TibTextUtils() { super(); }
/**
* Converts a list of glyphs into an array of {@link DuffData DuffData}.
* The motivation for this is that most processes - for example using
* TibetanMachineWeb in HTML - only need to know what
* text to output, and when to change fonts. In general, they don't
* need to have an explicit indication for each glyph of the font
* for that glyph.
* @param glyphs the list of TibetanMachineWeb glyphs
* you want to convert
* @return an array of DuffData corresponding to this
* list of glyphs
*/
public static DuffData[] convertGlyphs(List glyphs) {
if (glyphs.size() == 0)
return null;
List data = new ArrayList();
StringBuffer sb = new StringBuffer();
Iterator iter = glyphs.iterator();
DuffCode dc = (DuffCode)iter.next();
int lastfont = dc.fontNum;
sb.append(dc.character);
while (iter.hasNext()) {
dc = (DuffCode)iter.next();
if (dc.fontNum == lastfont)
sb.append(dc.character);
else {
data.add(new DuffData(sb.toString(), lastfont));
lastfont = dc.fontNum;
sb = new StringBuffer();
sb.append(dc.character);
}
}
data.add(new DuffData(sb.toString(), lastfont));
DuffData[] dd = new DuffData[0];
dd = (DuffData[])data.toArray(dd);
return dd;
}
/**
* Figures out how to arrange a list of characters into glyphs. For example, if the user types 'bsgr'
* using the Extended Wylie keyboard, this method figures out that this should be represented
* as a 'b' glyph followed by a 's-g-r' glyph. If you know that the characters do not
* contain Sanskrit stacks, or do not contain Tibetan stacks, then you can specify this
* to speed the process up. Otherwise, the method will first check to see if the characters
* correspond to any Tibetan stacks, and if not, then it will check for Sanskrit stacks.
* @param chars the list of Tibetan characters you want to find glyphs for
* @param areStacksOnRight whether stacking should try to maximize from right to left (true)
* or from left to right (false). In the Extended Wylie keyboard, you try to stack from
* right to left. Thus, the character sequence r-g-r would be stacked as r followed by gr,
* rather than rg followed by r. In the Sambhota and TCC keyboards, the stack direction
* is reversed.
* @param definitelyTibetan should be true if the characters are known to be Tibetan and
* not Sanskrit
* @param definitelySanskrit should be true if the characters are known to be Sanskrit and
* not Tibetan
*/
public static List getGlyphs(List chars, boolean areStacksOnRight, boolean definitelyTibetan, boolean definitelySanskrit) {
StringBuffer tibBuffer, sanBuffer;
String tibCluster, sanCluster;
boolean checkTibetan, checkSanskrit;
if (!(definitelyTibetan || definitelySanskrit)) {
checkTibetan = true;
checkSanskrit = true;
}
else {
checkTibetan = definitelyTibetan;
checkSanskrit = definitelySanskrit;
}
int length = chars.size();
List glyphs = new ArrayList();
glyphs.clear();
if (areStacksOnRight) {
for (int i=0; i<length; i++) {
tibBuffer = new StringBuffer();
tibCluster = null;
sanBuffer = new StringBuffer();
sanCluster = null;
for (int k=i; k<length; k++) {
String s = (String)chars.get(k);
if (checkTibetan)
tibBuffer.append(s);
if (checkSanskrit)
sanBuffer.append(s);
if (k!=length-1) {
if (checkTibetan)
tibBuffer.append("-");
if (checkSanskrit)
sanBuffer.append("+");
}
}
if (checkTibetan) {
tibCluster = tibBuffer.toString();
if (TibetanMachineWeb.hasGlyph(tibCluster)) {
Iterator iter = chars.iterator();
for (int k=0; k<i; k++) //should really check here to make sure glyphs exist FIXME
glyphs.add(TibetanMachineWeb.getGlyph((String)iter.next()));
glyphs.add(TibetanMachineWeb.getGlyph(tibCluster));
return glyphs;
}
}
if (checkSanskrit) {
sanCluster = sanBuffer.toString();
if (TibetanMachineWeb.hasGlyph(sanCluster)) {
Iterator iter = chars.iterator();
for (int k=0; k<i; k++) //should really check here to make sure glyphs exist FIXME
glyphs.add(TibetanMachineWeb.getGlyph((String)iter.next()));
glyphs.add(TibetanMachineWeb.getGlyph(sanCluster));
return glyphs;
}
}
}
}
else {
for (int i=length-1; i>-1; i--) {
tibBuffer = new StringBuffer();
tibCluster = null;
sanBuffer = new StringBuffer();
sanCluster = null;
Iterator iter = chars.iterator();
for (int k=0; k<i+1; k++) {
String s = (String)iter.next();
if (checkTibetan)
tibBuffer.append(s);
if (checkSanskrit)
sanBuffer.append(s);
if (k!=i) {
if (checkTibetan)
tibBuffer.append("-");
if (checkSanskrit)
sanBuffer.append("+");
}
}
if (checkTibetan) {
tibCluster = tibBuffer.toString();
if (TibetanMachineWeb.hasGlyph(tibCluster)) {
glyphs.add(TibetanMachineWeb.getGlyph(tibCluster));
for (int k=i+1; k<length; k++)
glyphs.add(TibetanMachineWeb.getGlyph((String)iter.next()));
return glyphs;
}
}
if (checkSanskrit) {
sanCluster = sanBuffer.toString();
if (TibetanMachineWeb.hasGlyph(sanCluster)) {
glyphs.add(TibetanMachineWeb.getGlyph(sanCluster));
for (int k=i+1; k<length; k++)
glyphs.add(TibetanMachineWeb.getGlyph((String)iter.next()));
return glyphs;
}
}
}
}
return null;
}
/**
* Finds the first meaningful element to occur within a string of Extended Wylie.
* This could be a character, a vowel,
* punctuation, or formatting. For example, passed the string 'tshapo',
* this method will return 'tsh'.
* @param wylie the String of wylie you want to scan
* @return the next meaningful subpart of this string, or null if
* no meaningful subpart can be found (for example 'x' has no equivalent
* in Extended Wylie)
*/
public static String getNext(String wylie) {
boolean hasThereBeenValidity = false;
boolean isThereValidity = false;
String s;
int i;
int offset = 0;
char c = wylie.charAt(offset);
int k = (int)c;
if (k < 32) //return null if character is just formatting
return String.valueOf(c);
if (c == TibetanMachineWeb.WYLIE_DISAMBIGUATING_KEY)
return String.valueOf(TibetanMachineWeb.WYLIE_DISAMBIGUATING_KEY);
if (c == TibetanMachineWeb.WYLIE_SANSKRIT_STACKING_KEY)
return String.valueOf(TibetanMachineWeb.WYLIE_SANSKRIT_STACKING_KEY);
for (i=offset+1; i<wylie.length()+1; i++) {
s = wylie.substring(offset, i);
if (!isThereValidity) {
if (TibetanMachineWeb.isWyliePunc(s) || TibetanMachineWeb.isWylieVowel(s) || TibetanMachineWeb.isWylieChar(s)) {
isThereValidity = true;
hasThereBeenValidity = true;
}
}
else {
if (!TibetanMachineWeb.isWyliePunc(s) && !TibetanMachineWeb.isWylieVowel(s) && !TibetanMachineWeb.isWylieChar(s)) {
isThereValidity = false;
break;
}
}
}
if (!hasThereBeenValidity)
s = null;
else {
if (isThereValidity) //the whole text region is valid
s = wylie.substring(offset, wylie.length());
else //the loop was broken out of
s = wylie.substring(offset, i-1);
}
return s;
}
/**
* Converts a string of Extended Wylie into {@link DuffData DuffData}.
* @param wylie the Wylie you want to convert
* @return an array of TibetanMachineWeb data
* corresponding to the Wylie text
* @throws InvalidWylieException if the Wylie is deemed invalid,
* i.e. if it does not conform to the Extended Wylie standard
*/
public static DuffData[] getTibetanMachineWeb(String wylie) throws InvalidWylieException {
List chars = new ArrayList();
DuffCode dc;
int start = 0;
boolean isSanskrit = false;
boolean wasLastSanskritStackingKey = false;
LinkedList glyphs = new LinkedList();
while (start < wylie.length()) {
String next = getNext(wylie.substring(start));
if (next == null) {
if (!chars.isEmpty()) {
glyphs.addAll(getGlyphs(chars, true, !isSanskrit, isSanskrit));
chars.clear();
isSanskrit = false;
}
else { //could not convert - throw exception
if (start+5 < wylie.length())
System.out.println("Bad wylie: "+wylie.substring(start,5));
else
System.out.println("Bad wylie: "+wylie.substring(start));
throw new InvalidWylieException(wylie, start);
}
}
else if (TibetanMachineWeb.isWyliePunc(next)) {
if (!chars.isEmpty())
glyphs.addAll(getGlyphs(chars, true, !isSanskrit, isSanskrit));
chars.clear();
if (next.equals(String.valueOf(TibetanMachineWeb.BINDU))) {
if (glyphs.isEmpty())
dc = null;
else
dc = (DuffCode)glyphs.removeLast(); //LinkedList implementation
glyphs.addAll(getBindu(dc));
}
else {
dc = TibetanMachineWeb.getGlyph(next);
glyphs.add(dc);
}
isSanskrit = false;
}
else if (TibetanMachineWeb.isWylieVowel(next)) {
if (!chars.isEmpty()) {
glyphs.addAll(getGlyphs(chars, true, !isSanskrit, isSanskrit));
dc = (DuffCode)glyphs.removeLast(); //LinkedList implementation
glyphs.addAll(getVowel(dc, next));
chars.clear();
}
else { //if previous is punctuation or null, then achen plus vowel - otherwise, previous could be vowel
int size = glyphs.size();
vowel_block: {
if (size > 1) {
dc = (DuffCode)glyphs.get(glyphs.size()-1);
if (!TibetanMachineWeb.isWyliePunc(TibetanMachineWeb.getWylieForGlyph(dc))) {
DuffCode dc_2 = (DuffCode)glyphs.removeLast();
DuffCode dc_1 = (DuffCode)glyphs.removeLast();
glyphs.addAll(getVowel(dc_1, dc_2, next));
break vowel_block;
}
}
DuffCode[] dc_array = (DuffCode[])TibetanMachineWeb.getTibHash().get(TibetanMachineWeb.ACHEN);
dc = dc_array[TibetanMachineWeb.TMW];
glyphs.addAll(getVowel(dc, next));
}
chars.clear();
}
isSanskrit = false;
}
else if (TibetanMachineWeb.isWylieChar(next)) {
if (!isSanskrit) //add char to list - it is not sanskrit
chars.add(next);
else if (wasLastSanskritStackingKey) { //add char to list - it is still part of sanskrit stack
chars.add(next);
wasLastSanskritStackingKey = false;
}
else { //char is no longer part of sanskrit stack, therefore compute and add previous stack
glyphs.addAll(getGlyphs(chars, true, !isSanskrit, isSanskrit));
chars.clear();
chars.add(next);
isSanskrit = false;
wasLastSanskritStackingKey = false;
}
}
else if (next.equals(String.valueOf(TibetanMachineWeb.WYLIE_DISAMBIGUATING_KEY))) {
if (!chars.isEmpty())
glyphs.addAll(getGlyphs(chars, true, !isSanskrit, isSanskrit));
chars.clear();
isSanskrit = false;
}
else if (next.equals(String.valueOf(TibetanMachineWeb.WYLIE_SANSKRIT_STACKING_KEY))) {
if (!isSanskrit) { //begin sanskrit stack
switch (chars.size()) {
case 0:
break; //'+' is not "pre-stacking" key
case 1:
isSanskrit = true;
wasLastSanskritStackingKey = true;
break;
default:
String top_char = (String)chars.get(chars.size()-1);
chars.remove(chars.size()-1);
glyphs.addAll(getGlyphs(chars, true, !isSanskrit, isSanskrit));
chars.clear();
chars.add(top_char);
isSanskrit = true;
wasLastSanskritStackingKey = true;
break;
}
}
}
else if (TibetanMachineWeb.isFormatting(next.charAt(0))) {
if (!chars.isEmpty())
glyphs.addAll(getGlyphs(chars, true, !isSanskrit, isSanskrit));
dc = new DuffCode(1,next.charAt(0));
glyphs.add(dc);
chars.clear();
isSanskrit = false;
}
if (next != null)
start += next.length();
}
if (!chars.isEmpty()) {
glyphs.addAll(getGlyphs(chars, true, !isSanskrit, isSanskrit));
chars.clear();
}
DuffData[] dd = convertGlyphs(glyphs);
return dd;
}
/**
* Gets the bindu sequence for a given context.
* In the TibetanMachineWeb fonts, bindu (anusvara) is realized
* differently depending on which vowel it attaches to. Although
* the default bindu glyph is affixed to consonants and subscript vowels,
* for superscript vowels (i, e, o, etc), there is a single glyph
* which merges the bindu and that vowel together. When you pass this
* method a glyph context, it will return a List of glyphs which
* will either consist of the original glyph followed by the default
* bindu glyph, or a composite vowel+bindu glyph.
* Note that there is only one glyph in the context. This means that
* bindus will not affix properly if superscript vowels are allowed to directly
* precede subscript vowels (e.g. pou).
* @param dc the DuffCode of the glyph you
* want to attach a bindu to
* @return a List of DuffCode glyphs that include the
* original dc, as well as a bindu
*/
public static List getBindu(DuffCode dc) {
List bindus = new ArrayList();
if (null == dc) {
bindus.add(TibetanMachineWeb.getGlyph(String.valueOf(TibetanMachineWeb.BINDU)));
return bindus;
}
if (!TibetanMachineWeb.getBinduMap().containsKey(dc)) {
bindus.add(dc);
bindus.add(TibetanMachineWeb.getGlyph(String.valueOf(TibetanMachineWeb.BINDU)));
return bindus;
}
bindus.add((DuffCode)TibetanMachineWeb.getBinduMap().get(dc));
return bindus;
}
/**
* Gets the vowel sequence for a given vowel in a given context.
* Given a context, this method affixes a vowel and returns the
* context plus the vowel. Generally, it is enough to provide just
* one glyph for context.
* @param context the glyph preceding the vowel you want to affix
* @param vowel the vowel you want to affix, in Wylie
* @return a List of glyphs equal to the vowel in context
*/
public static List getVowel(DuffCode context, String vowel) {
return getVowel(null, context, vowel);
}
/**
* Gets the vowel sequence for a given vowel in a given context.
* Given a context, this method affixes a vowel and returns the context plus the vowel.
* Since the choice of vowel glyph depends on the consonant to which it is attached,
* generally it is enough to provide just the immediately preceding context. However,
* in some cases, double vowels are allowed - for example 'buo'. To find the correct
* glyph for 'o', we need 'b' in this case, not 'u'. Note also that some Extended
* Wylie vowels correspond to multiple glyphs in TibetanMachineWeb. For example,
* the vowel I consists of both an achung and a reverse gigu. All required glyphs
* are part of the returned List.
* @param context_1 the glyph occurring two glyphs before the vowel you want to affix
* @param context_2 the glyph immediately before the vowel you want to affix
* @param vowel the vowel you want to affix, in Wylie
* @return a List of glyphs equal to the vowel in context
*/
public static List getVowel(DuffCode context_1, DuffCode context_2, String vowel) {
List vowels = new ArrayList();
//this vowel doesn't correspond to a glyph -
//so you just return the original context
if ( vowel.equals(TibetanMachineWeb.WYLIE_aVOWEL) ||
TibetanMachineWeb.isTopVowel(context_2)) {
if (context_1 != null)
vowels.add(context_1);
vowels.add(context_2);
return vowels;
}
//first, the three easiest cases: ai, au, and <i
//these vowels have one invariant form - therefore,
//dc_context is just returned along with that form
if (vowel.equals(TibetanMachineWeb.ai_VOWEL)) {
if (context_1 != null)
vowels.add(context_1);
vowels.add(context_2);
DuffCode[] dc_v = (DuffCode[])TibetanMachineWeb.getTibHash().get(TibetanMachineWeb.ai_VOWEL);
vowels.add(dc_v[TibetanMachineWeb.TMW]);
return vowels;
}
if (vowel.equals(TibetanMachineWeb.au_VOWEL)) {
if (context_1 != null)
vowels.add(context_1);
vowels.add(context_2);
DuffCode[] dc_v = (DuffCode[])TibetanMachineWeb.getTibHash().get(TibetanMachineWeb.au_VOWEL);
vowels.add(dc_v[TibetanMachineWeb.TMW]);
return vowels;
}
if (vowel.equals(TibetanMachineWeb.reverse_i_VOWEL)) {
if (context_1 != null)
vowels.add(context_1);
vowels.add(context_2);
if (!TibetanMachineWeb.isTopVowel(context_2)) {
DuffCode[] dc_v = (DuffCode[])TibetanMachineWeb.getTibHash().get(TibetanMachineWeb.reverse_i_VOWEL);
vowels.add(dc_v[TibetanMachineWeb.TMW]);
}
return vowels;
}
//second, the vowels i, e, and o
//these vowels have many different glyphs each,
//whose correct selection depends on the
//preceding context. therefore, dc_context is
//returned along with the vowel appropriate to
//that context
if (vowel.equals(TibetanMachineWeb.i_VOWEL)) {
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_i);
if (null == dc_v && null != context_1) {
hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_1);
dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_i);
}
if (context_1 != null)
vowels.add(context_1);
vowels.add(context_2);
if (null != dc_v)
vowels.add(dc_v);
return vowels;
}
if (vowel.equals(TibetanMachineWeb.e_VOWEL)) {
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_e);
if (null == dc_v && null != context_1) {
hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_1);
dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_e);
}
if (context_1 != null)
vowels.add(context_1);
vowels.add(context_2);
if (null != dc_v)
vowels.add(dc_v);
return vowels;
}
if (vowel.equals(TibetanMachineWeb.o_VOWEL)) {
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_o);
if (null == dc_v && null != context_1) {
hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_1);
dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_o);
}
if (context_1 != null)
vowels.add(context_1);
vowels.add(context_2);
if (null != dc_v)
vowels.add(dc_v);
return vowels;
}
//next come the vowels u, A, and U
//these three vowels are grouped together because they all
//can cause the preceding context to change. in particular,
//both u and A cannot be affixed to ordinary k or g, but
//rather the shortened versions of k and g - therefore,
if (vowel.equals(TibetanMachineWeb.u_VOWEL)) {
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_u);
if (null != context_1)
vowels.add(context_1);
if (null == halfHeight)
vowels.add(context_2);
else
vowels.add(halfHeight);
if (null != dc_v)
vowels.add(dc_v);
return vowels;
}
if (vowel.equals(TibetanMachineWeb.A_VOWEL)) {
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_A);
if (null != context_1)
vowels.add(context_1);
if (null == halfHeight)
vowels.add(context_2);
else
vowels.add(halfHeight);
if (null != dc_v)
vowels.add(dc_v);
return vowels;
}
if (vowel.equals(TibetanMachineWeb.U_VOWEL)) {
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_U);
if (null != context_1)
vowels.add(context_1);
if (null == halfHeight)
vowels.add(context_2);
else
vowels.add(halfHeight);
if (null != dc_v && !TibetanMachineWeb.isTopVowel(context_2))
vowels.add(dc_v);
return vowels;
}
//finally, the vowels I and <I
//these vowels are unique in that they both
//require a change from the previous character,
//and consist of two glyphs themselves
if (vowel.equals(TibetanMachineWeb.I_VOWEL)) {
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
DuffCode dc_v_sub = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_A);
DuffCode dc_v_sup = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_i);
if (null != context_1)
vowels.add(context_1);
if (null == halfHeight)
vowels.add(context_2);
else
vowels.add(halfHeight);
if (null != dc_v_sub && null != dc_v_sup) {
vowels.add(dc_v_sub);
vowels.add(dc_v_sup);
}
return vowels;
}
if (vowel.equals(TibetanMachineWeb.reverse_I_VOWEL)) {
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
DuffCode dc_v_sub = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_A);
DuffCode[] tv_array = (DuffCode[])TibetanMachineWeb.getTibHash().get(TibetanMachineWeb.reverse_i_VOWEL);
DuffCode dc_v_sup = tv_array[TibetanMachineWeb.TMW];
if (null != context_1)
vowels.add(context_1);
if (null == halfHeight)
vowels.add(context_2);
else
vowels.add(halfHeight);
if (null != dc_v_sub && null != dc_v_sup) {
vowels.add(dc_v_sub);
vowels.add(dc_v_sup);
}
return vowels;
}
return null;
}
/**
* Scans a list of glyphs and returns an Extended Wylie string with 'a' inserted.
* Passed a list of TibetanMachineWeb glyphs that constitute a partial
* syllable that is in need of an 'a' vowel, this method scans the list,
* figures out where to put the 'a' vowel, and then returns a string
* of Wylie corresponding to this sequence. This method is used
* heavily during TibetanMachineWeb to Extended Wylie conversion,
* since there is no glyph corresponding to the Extended Wylie 'a' vowel.
* @param glyphList a list of TibetanMachine glyphs, i.e. {@link org.thdl.tib.text.DuffCode DuffCodes}.
* @return the Wylie string corresponding to this glyph list, with 'a' inserted.
*/
public static String withA(java.util.List glyphList) {
StringBuffer sb = new StringBuffer();
Iterator iter = glyphList.iterator();
int size = glyphList.size();
DuffCode dc;
String wylie;
String lastWylie = new String();
switch (size) {
case 0:
return "";
case 1: //only one character: 'a' goes after it
dc = (DuffCode)iter.next();
wylie = TibetanMachineWeb.getWylieForGlyph(dc);
sb.append(wylie);
if (!wylie.equals(TibetanMachineWeb.ACHEN))
sb.append(TibetanMachineWeb.WYLIE_aVOWEL);
return sb.toString();
case 2: //two characters: 'a' either goes after first or after both
dc = (DuffCode)iter.next();
lastWylie = TibetanMachineWeb.getWylieForGlyph(dc);
sb.append(lastWylie);
dc = (DuffCode)iter.next();
wylie = TibetanMachineWeb.getWylieForGlyph(dc);
if (TibetanMachineWeb.isWylieRight(wylie)) {
if (!lastWylie.equals(TibetanMachineWeb.ACHEN))
sb.append(TibetanMachineWeb.WYLIE_aVOWEL);
sb.append(wylie);
}
else {
//note: "g" and "y" should not be hard-coded in DuffPane
// instead, TibetanMachineWeb should introduce relevant sets
if (lastWylie.equals("g") && wylie.equals("y"))
sb.append(TibetanMachineWeb.WYLIE_DISAMBIGUATING_KEY);
if (!wylie.equals(TibetanMachineWeb.ACHEN)) {
sb.append(wylie);
sb.append(TibetanMachineWeb.WYLIE_aVOWEL);
}
else {
sb.append(TibetanMachineWeb.WYLIE_DISAMBIGUATING_KEY);
sb.append(wylie);
}
}
return sb.toString();
default: //three or more characters: 'a' goes before last two, between last two, or in final position
int i = 0;
while (iter.hasNext() && i+2 < size) {
dc = (DuffCode)iter.next();
wylie = TibetanMachineWeb.getWylieForGlyph(dc);
if (lastWylie.equals("g") && wylie.equals("y")
|| !lastWylie.equals("") && wylie.equals(TibetanMachineWeb.ACHEN))
sb.append(TibetanMachineWeb.WYLIE_DISAMBIGUATING_KEY);
sb.append(wylie);
lastWylie = wylie;
i++;
}
DuffCode dc1, dc2;
String wylie1, wylie2;
dc1 = (DuffCode)iter.next();
wylie1 = TibetanMachineWeb.getWylieForGlyph(dc1);
dc2 = (DuffCode)iter.next();
wylie2 = TibetanMachineWeb.getWylieForGlyph(dc2);
if (TibetanMachineWeb.isWylieLeft(lastWylie) && TibetanMachineWeb.isWylieRight(wylie2)) {
if (lastWylie.equals("g") && wylie1.equals("y"))
sb.append(TibetanMachineWeb.WYLIE_DISAMBIGUATING_KEY);
if (!wylie1.equals(TibetanMachineWeb.ACHEN)) {
sb.append(wylie1);
sb.append(TibetanMachineWeb.WYLIE_aVOWEL);
}
else {
sb.append(TibetanMachineWeb.WYLIE_DISAMBIGUATING_KEY);
sb.append(wylie1);
}
sb.append(wylie2);
}
else if (TibetanMachineWeb.isWylieRight(wylie1) && TibetanMachineWeb.isWylieFarRight(wylie2)) {
if (!lastWylie.equals(TibetanMachineWeb.ACHEN))
sb.append(TibetanMachineWeb.WYLIE_aVOWEL);
sb.append(wylie1);
sb.append(wylie2);
}
else {
sb.append(wylie1);
if (wylie1.equals("g") && wylie2.equals("y"))
sb.append(TibetanMachineWeb.WYLIE_DISAMBIGUATING_KEY);
if (!wylie2.equals(TibetanMachineWeb.ACHEN)) {
sb.append(wylie2);
sb.append(TibetanMachineWeb.WYLIE_aVOWEL);
}
else {
sb.append(TibetanMachineWeb.WYLIE_DISAMBIGUATING_KEY);
sb.append(wylie2);
}
}
return sb.toString();
}
}
/**
* Gets the Extended Wylie for a list of glyphs.
* Passed a list of TibetanMachineWeb glyphs that constitute a partial
* or complete syllable, this method scans the list, and then returns a
* string of Wylie corresponding to this sequence. No 'a' vowel is
* inserted because it is assumed that the glyph list already contains
* some other vowel. If the glyph list does not already contain a vowel,
* then this method should not be called.
*
* @param glyphList a list of TibetanMachine glyphs, i.e. {@link org.thdl.tib.text.DuffCode DuffCodes}
* @return the Wylie string corresponding to this glyph list
*/
public static String withoutA(java.util.List glyphList) {
StringBuffer sb = new StringBuffer();
Iterator iter = glyphList.iterator();
DuffCode dc;
String currWylie;
String lastWylie = new String();
while (iter.hasNext()) {
dc = (DuffCode)iter.next();
currWylie = TibetanMachineWeb.getWylieForGlyph(dc);
//note: "g" and "y" should not be hard-coded
// instead, TibetanMachineWeb should introduce relevant sets
if (lastWylie.equals("g") && currWylie.equals("y")
|| !lastWylie.equals("") && currWylie.equals(TibetanMachineWeb.ACHEN))
sb.append(TibetanMachineWeb.WYLIE_DISAMBIGUATING_KEY);
sb.append(currWylie);
lastWylie = currWylie;
}
return sb.toString();
}
/**
* Gets the Extended Wylie for a set of glyphs.
* @param dcs an array of glyphs
* @return the Extended Wylie corresponding to these glyphs
*/
public static String getWylie(DuffCode[] dcs) {
if (dcs.length == 0)
return null;
char ch;
String wylie;
List glyphList = new ArrayList();
boolean needsVowel = true;
boolean isLastVowel = false;
int start = 0;
StringBuffer wylieBuffer = new StringBuffer();
for (int i=start; i<dcs.length; i++) {
ch = dcs[i].character;
int k = dcs[i].charNum;
// int fontNum = dcs[i].fontNum;
if (k < 32) {
if (wylieBuffer.length() > 0 || !glyphList.isEmpty()) {
if (needsVowel)
wylieBuffer.append(withA(glyphList));
else
wylieBuffer.append(withoutA(glyphList));
glyphList.clear();
needsVowel = true;
isLastVowel = false;
}
wylieBuffer.append(ch);
}
else {
wylie = TibetanMachineWeb.getWylieForGlyph(dcs[i]);
boolean containsBindu = false;
if (wylie.length() > 1 && wylie.charAt(wylie.length()-1) == TibetanMachineWeb.BINDU) {
char[] cArray = wylie.toCharArray();
wylie = new String(cArray, 0, wylie.length()-1);
containsBindu = true;
}
process_block: {
if (TibetanMachineWeb.isWyliePunc(wylie)) {
isLastVowel = false;
if (glyphList.isEmpty())
wylieBuffer.append(wylie);
else {
if (needsVowel)
wylieBuffer.append(withA(glyphList));
else
wylieBuffer.append(withoutA(glyphList));
wylieBuffer.append(wylie); //append the punctuation
glyphList.clear();
}
needsVowel = true; //next consonants are syllable onset, so we are awaiting vowel
}
//isChar must come before isVowel because ACHEN has priority over WYLIE_aVOWEL
else if (TibetanMachineWeb.isWylieChar(wylie)) {
isLastVowel = false;
glyphList.add(dcs[i]);
}
else if (TibetanMachineWeb.isWylieVowel(wylie)) {
if (isLastVowel) {
int len = wylieBuffer.length();
int A_len = TibetanMachineWeb.A_VOWEL.length();
if (wylieBuffer.substring(len-A_len).equals(TibetanMachineWeb.A_VOWEL)) {
try {
if (wylie.equals(TibetanMachineWeb.i_VOWEL)) {
wylieBuffer.delete(len-A_len, len);
wylieBuffer.append(TibetanMachineWeb.I_VOWEL);
isLastVowel = false;
break process_block;
}
else if (wylie.equals(TibetanMachineWeb.reverse_i_VOWEL)) {
wylieBuffer.delete(len-A_len, len);
wylieBuffer.append(TibetanMachineWeb.reverse_I_VOWEL);
isLastVowel = false;
break process_block;
}
}
catch (StringIndexOutOfBoundsException se) {
ThdlDebug.noteIffyCode();
}
wylieBuffer.append(wylie); //append current vowel
isLastVowel = false;
}
else
wylieBuffer.append(wylie); //append current vowel
}
else {
int glyphCount = glyphList.size();
boolean insertDisAmbig = false;
if (0 != glyphCount) {
DuffCode top_dc = (DuffCode)glyphList.get(glyphCount-1);
String top_wylie = TibetanMachineWeb.getWylieForGlyph(top_dc);
if (top_wylie.equals(TibetanMachineWeb.ACHEN)) {
glyphList.remove(glyphCount-1);
if (glyphCount-1 == 0)
top_dc = null;
else {
insertDisAmbig = true;
top_dc = (DuffCode)glyphList.get(glyphCount-2);
}
}
if (top_dc == null || !TibetanMachineWeb.getWylieForGlyph(top_dc).equals(TibetanMachineWeb.ACHUNG))
wylieBuffer.append(withoutA(glyphList)); //append consonants in glyphList
else {
glyphCount = glyphList.size();
glyphList.remove(glyphCount-1);
if (glyphCount-1 != 0)
wylieBuffer.append(withA(glyphList));
wylieBuffer.append(TibetanMachineWeb.ACHUNG);
}
}
if (insertDisAmbig)
wylieBuffer.append(TibetanMachineWeb.WYLIE_DISAMBIGUATING_KEY);
wylieBuffer.append(wylie); //append vowel
glyphList.clear();
isLastVowel = true;
needsVowel = false;
}
}
else { //must be a stack
isLastVowel = false;
glyphList.add(dcs[i]);
}
}
if (containsBindu) {
isLastVowel = false;
wylieBuffer.append(withoutA(glyphList));
wylieBuffer.append(TibetanMachineWeb.BINDU); //append the bindu
glyphList.clear();
}
}
}
//replace TMW with Wylie
if (!glyphList.isEmpty()) {
if (needsVowel)
wylieBuffer.append(withA(glyphList));
else
wylieBuffer.append(withoutA(glyphList));
}
if (wylieBuffer.length() > 0)
return wylieBuffer.toString();
else
return null;
}
}