2002-11-02 03:38:59 +00:00
|
|
|
/*
|
|
|
|
The contents of this file are subject to the THDL Open Community License
|
|
|
|
Version 1.0 (the "License"); you may not use this file except in compliance
|
|
|
|
with the License. You may obtain a copy of the License on the THDL web site
|
|
|
|
(http://www.thdl.org/).
|
|
|
|
|
|
|
|
Software distributed under the License is distributed on an "AS IS" basis,
|
|
|
|
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
|
|
|
|
License for the specific terms governing rights and limitations under the
|
|
|
|
License.
|
|
|
|
|
|
|
|
The Initial Developer of this software is the Tibetan and Himalayan Digital
|
2003-03-30 02:31:16 +00:00
|
|
|
Library (THDL). Portions created by the THDL are Copyright 2001-2003 THDL.
|
2002-11-02 03:38:59 +00:00
|
|
|
All Rights Reserved.
|
|
|
|
|
|
|
|
Contributor(s): ______________________________________.
|
|
|
|
*/
|
|
|
|
|
|
|
|
package org.thdl.tib.text;
|
|
|
|
|
|
|
|
import java.util.*;
|
|
|
|
import javax.swing.*;
|
|
|
|
import javax.swing.text.*;
|
|
|
|
import javax.swing.text.rtf.RTFEditorKit;
|
|
|
|
import java.io.*;
|
|
|
|
|
|
|
|
import org.thdl.util.ThdlDebug;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Provides methods for converting back and forth between Extended
|
|
|
|
* Wylie and TibetanMachineWeb. This class is not instantiable.
|
|
|
|
*
|
|
|
|
* <p>
|
|
|
|
* The class provides a variety of static methods for converting
|
|
|
|
* back and forth between Extended Wylie and TibetanMachineWeb. The
|
|
|
|
* Wylie can be accessed as a String, while the TibetanMachineWeb can
|
|
|
|
* be exported as Rich Text Format.
|
|
|
|
*
|
|
|
|
* @author Edward Garrett, Tibetan and Himalayan Digital Library */
|
|
|
|
public class TibTextUtils {
|
|
|
|
/** Do not use this contructor. */
|
|
|
|
private TibTextUtils() { super(); }
|
|
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Converts a list of glyphs into an array of {@link DuffData DuffData}.
|
|
|
|
* The motivation for this is that most processes - for example using
|
|
|
|
* TibetanMachineWeb in HTML - only need to know what
|
|
|
|
* text to output, and when to change fonts. In general, they don't
|
|
|
|
* need to have an explicit indication for each glyph of the font
|
|
|
|
* for that glyph.
|
|
|
|
* @param glyphs the list of TibetanMachineWeb glyphs
|
|
|
|
* you want to convert
|
|
|
|
* @return an array of DuffData corresponding to this
|
|
|
|
* list of glyphs
|
|
|
|
*/
|
|
|
|
public static DuffData[] convertGlyphs(List glyphs) {
|
|
|
|
if (glyphs.size() == 0)
|
|
|
|
return null;
|
|
|
|
List data = new ArrayList();
|
|
|
|
StringBuffer sb = new StringBuffer();
|
|
|
|
Iterator iter = glyphs.iterator();
|
|
|
|
DuffCode dc = (DuffCode)iter.next();
|
|
|
|
int lastfont = dc.fontNum;
|
|
|
|
sb.append(dc.character);
|
|
|
|
|
|
|
|
while (iter.hasNext()) {
|
|
|
|
dc = (DuffCode)iter.next();
|
|
|
|
if (dc.fontNum == lastfont)
|
|
|
|
sb.append(dc.character);
|
|
|
|
else {
|
|
|
|
data.add(new DuffData(sb.toString(), lastfont));
|
|
|
|
lastfont = dc.fontNum;
|
|
|
|
sb = new StringBuffer();
|
|
|
|
sb.append(dc.character);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
data.add(new DuffData(sb.toString(), lastfont));
|
|
|
|
|
|
|
|
DuffData[] dd = new DuffData[0];
|
|
|
|
dd = (DuffData[])data.toArray(dd);
|
|
|
|
return dd;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Figures out how to arrange a list of characters into glyphs. For example, if the user types 'bsgr'
|
|
|
|
* using the Extended Wylie keyboard, this method figures out that this should be represented
|
|
|
|
* as a 'b' glyph followed by a 's-g-r' glyph. If you know that the characters do not
|
|
|
|
* contain Sanskrit stacks, or do not contain Tibetan stacks, then you can specify this
|
|
|
|
* to speed the process up. Otherwise, the method will first check to see if the characters
|
|
|
|
* correspond to any Tibetan stacks, and if not, then it will check for Sanskrit stacks.
|
|
|
|
* @param chars the list of Tibetan characters you want to find glyphs for
|
|
|
|
* @param areStacksOnRight whether stacking should try to maximize from right to left (true)
|
|
|
|
* or from left to right (false). In the Extended Wylie keyboard, you try to stack from
|
|
|
|
* right to left. Thus, the character sequence r-g-r would be stacked as r followed by gr,
|
|
|
|
* rather than rg followed by r. In the Sambhota and TCC keyboards, the stack direction
|
|
|
|
* is reversed.
|
|
|
|
* @param definitelyTibetan should be true if the characters are known to be Tibetan and
|
|
|
|
* not Sanskrit
|
|
|
|
* @param definitelySanskrit should be true if the characters are known to be Sanskrit and
|
|
|
|
* not Tibetan
|
|
|
|
*/
|
|
|
|
public static List getGlyphs(List chars, boolean areStacksOnRight, boolean definitelyTibetan, boolean definitelySanskrit) {
|
|
|
|
StringBuffer tibBuffer, sanBuffer;
|
|
|
|
String tibCluster, sanCluster;
|
|
|
|
|
|
|
|
boolean checkTibetan, checkSanskrit;
|
|
|
|
|
|
|
|
if (!(definitelyTibetan || definitelySanskrit)) {
|
|
|
|
checkTibetan = true;
|
|
|
|
checkSanskrit = true;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
checkTibetan = definitelyTibetan;
|
|
|
|
checkSanskrit = definitelySanskrit;
|
|
|
|
}
|
|
|
|
|
|
|
|
int length = chars.size();
|
|
|
|
|
|
|
|
List glyphs = new ArrayList();
|
|
|
|
glyphs.clear();
|
|
|
|
|
|
|
|
if (areStacksOnRight) {
|
|
|
|
for (int i=0; i<length; i++) {
|
|
|
|
tibBuffer = new StringBuffer();
|
|
|
|
tibCluster = null;
|
|
|
|
|
|
|
|
sanBuffer = new StringBuffer();
|
|
|
|
sanCluster = null;
|
|
|
|
|
|
|
|
for (int k=i; k<length; k++) {
|
|
|
|
String s = (String)chars.get(k);
|
|
|
|
|
|
|
|
if (checkTibetan)
|
|
|
|
tibBuffer.append(s);
|
|
|
|
|
|
|
|
if (checkSanskrit)
|
|
|
|
sanBuffer.append(s);
|
|
|
|
|
|
|
|
if (k!=length-1) {
|
|
|
|
if (checkTibetan)
|
|
|
|
tibBuffer.append("-");
|
|
|
|
|
|
|
|
if (checkSanskrit)
|
|
|
|
sanBuffer.append("+");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (checkTibetan) {
|
|
|
|
tibCluster = tibBuffer.toString();
|
|
|
|
|
|
|
|
if (TibetanMachineWeb.hasGlyph(tibCluster)) {
|
|
|
|
Iterator iter = chars.iterator();
|
|
|
|
for (int k=0; k<i; k++) //should really check here to make sure glyphs exist FIXME
|
|
|
|
glyphs.add(TibetanMachineWeb.getGlyph((String)iter.next()));
|
|
|
|
|
|
|
|
glyphs.add(TibetanMachineWeb.getGlyph(tibCluster));
|
|
|
|
return glyphs;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (checkSanskrit) {
|
|
|
|
sanCluster = sanBuffer.toString();
|
|
|
|
|
|
|
|
if (TibetanMachineWeb.hasGlyph(sanCluster)) {
|
|
|
|
Iterator iter = chars.iterator();
|
|
|
|
for (int k=0; k<i; k++) //should really check here to make sure glyphs exist FIXME
|
|
|
|
glyphs.add(TibetanMachineWeb.getGlyph((String)iter.next()));
|
|
|
|
|
|
|
|
glyphs.add(TibetanMachineWeb.getGlyph(sanCluster));
|
|
|
|
return glyphs;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
for (int i=length-1; i>-1; i--) {
|
|
|
|
tibBuffer = new StringBuffer();
|
|
|
|
tibCluster = null;
|
|
|
|
|
|
|
|
sanBuffer = new StringBuffer();
|
|
|
|
sanCluster = null;
|
|
|
|
|
|
|
|
Iterator iter = chars.iterator();
|
|
|
|
|
|
|
|
for (int k=0; k<i+1; k++) {
|
|
|
|
String s = (String)iter.next();
|
|
|
|
|
|
|
|
if (checkTibetan)
|
|
|
|
tibBuffer.append(s);
|
|
|
|
|
|
|
|
if (checkSanskrit)
|
|
|
|
sanBuffer.append(s);
|
|
|
|
|
|
|
|
if (k!=i) {
|
|
|
|
if (checkTibetan)
|
|
|
|
tibBuffer.append("-");
|
|
|
|
|
|
|
|
if (checkSanskrit)
|
|
|
|
sanBuffer.append("+");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (checkTibetan) {
|
|
|
|
tibCluster = tibBuffer.toString();
|
|
|
|
|
|
|
|
if (TibetanMachineWeb.hasGlyph(tibCluster)) {
|
|
|
|
glyphs.add(TibetanMachineWeb.getGlyph(tibCluster));
|
|
|
|
for (int k=i+1; k<length; k++)
|
|
|
|
glyphs.add(TibetanMachineWeb.getGlyph((String)iter.next()));
|
|
|
|
|
|
|
|
return glyphs;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (checkSanskrit) {
|
|
|
|
sanCluster = sanBuffer.toString();
|
|
|
|
|
|
|
|
if (TibetanMachineWeb.hasGlyph(sanCluster)) {
|
|
|
|
glyphs.add(TibetanMachineWeb.getGlyph(sanCluster));
|
|
|
|
for (int k=i+1; k<length; k++)
|
|
|
|
glyphs.add(TibetanMachineWeb.getGlyph((String)iter.next()));
|
|
|
|
|
|
|
|
return glyphs;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Finds the first meaningful element to occur within a string of Extended Wylie.
|
|
|
|
* This could be a character, a vowel,
|
|
|
|
* punctuation, or formatting. For example, passed the string 'tshapo',
|
|
|
|
* this method will return 'tsh'.
|
|
|
|
* @param wylie the String of wylie you want to scan
|
|
|
|
* @return the next meaningful subpart of this string, or null if
|
|
|
|
* no meaningful subpart can be found (for example 'x' has no equivalent
|
|
|
|
* in Extended Wylie)
|
|
|
|
*/
|
|
|
|
public static String getNext(String wylie) {
|
|
|
|
boolean hasThereBeenValidity = false;
|
|
|
|
boolean isThereValidity = false;
|
|
|
|
|
|
|
|
String s;
|
|
|
|
int i;
|
|
|
|
int offset = 0;
|
|
|
|
|
|
|
|
char c = wylie.charAt(offset);
|
|
|
|
int k = (int)c;
|
|
|
|
|
|
|
|
if (k < 32) //return null if character is just formatting
|
|
|
|
return String.valueOf(c);
|
|
|
|
|
|
|
|
if (c == TibetanMachineWeb.WYLIE_DISAMBIGUATING_KEY)
|
|
|
|
return String.valueOf(TibetanMachineWeb.WYLIE_DISAMBIGUATING_KEY);
|
|
|
|
|
|
|
|
if (c == TibetanMachineWeb.WYLIE_SANSKRIT_STACKING_KEY)
|
|
|
|
return String.valueOf(TibetanMachineWeb.WYLIE_SANSKRIT_STACKING_KEY);
|
|
|
|
|
|
|
|
for (i=offset+1; i<wylie.length()+1; i++) {
|
|
|
|
s = wylie.substring(offset, i);
|
|
|
|
|
|
|
|
if (!isThereValidity) {
|
|
|
|
if (TibetanMachineWeb.isWyliePunc(s) || TibetanMachineWeb.isWylieVowel(s) || TibetanMachineWeb.isWylieChar(s)) {
|
|
|
|
isThereValidity = true;
|
|
|
|
hasThereBeenValidity = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
if (!TibetanMachineWeb.isWyliePunc(s) && !TibetanMachineWeb.isWylieVowel(s) && !TibetanMachineWeb.isWylieChar(s)) {
|
|
|
|
isThereValidity = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!hasThereBeenValidity)
|
|
|
|
s = null;
|
|
|
|
|
|
|
|
else {
|
|
|
|
if (isThereValidity) //the whole text region is valid
|
|
|
|
s = wylie.substring(offset, wylie.length());
|
|
|
|
|
|
|
|
else //the loop was broken out of
|
|
|
|
s = wylie.substring(offset, i-1);
|
|
|
|
}
|
|
|
|
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Converts a string of Extended Wylie into {@link DuffData DuffData}.
|
|
|
|
* @param wylie the Wylie you want to convert
|
|
|
|
* @return an array of TibetanMachineWeb data
|
|
|
|
* corresponding to the Wylie text
|
|
|
|
* @throws InvalidWylieException if the Wylie is deemed invalid,
|
|
|
|
* i.e. if it does not conform to the Extended Wylie standard
|
|
|
|
*/
|
|
|
|
public static DuffData[] getTibetanMachineWeb(String wylie) throws InvalidWylieException {
|
|
|
|
List chars = new ArrayList();
|
|
|
|
DuffCode dc;
|
|
|
|
int start = 0;
|
|
|
|
boolean isSanskrit = false;
|
|
|
|
boolean wasLastSanskritStackingKey = false;
|
|
|
|
LinkedList glyphs = new LinkedList();
|
|
|
|
|
|
|
|
while (start < wylie.length()) {
|
|
|
|
String next = getNext(wylie.substring(start));
|
|
|
|
|
|
|
|
if (next == null) {
|
|
|
|
if (!chars.isEmpty()) {
|
|
|
|
glyphs.addAll(getGlyphs(chars, true, !isSanskrit, isSanskrit));
|
|
|
|
chars.clear();
|
|
|
|
isSanskrit = false;
|
|
|
|
}
|
|
|
|
else { //could not convert - throw exception
|
|
|
|
if (start+5 < wylie.length())
|
2003-03-30 02:31:16 +00:00
|
|
|
System.out.println("Bad wylie: "+wylie.substring(start,5)); // FIXME: we're printing to stdout!
|
2002-11-02 03:38:59 +00:00
|
|
|
else
|
|
|
|
System.out.println("Bad wylie: "+wylie.substring(start));
|
|
|
|
throw new InvalidWylieException(wylie, start);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
else if (TibetanMachineWeb.isWyliePunc(next)) {
|
|
|
|
if (!chars.isEmpty())
|
|
|
|
glyphs.addAll(getGlyphs(chars, true, !isSanskrit, isSanskrit));
|
|
|
|
|
|
|
|
chars.clear();
|
|
|
|
|
|
|
|
if (next.equals(String.valueOf(TibetanMachineWeb.BINDU))) {
|
|
|
|
if (glyphs.isEmpty())
|
|
|
|
dc = null;
|
|
|
|
else
|
|
|
|
dc = (DuffCode)glyphs.removeLast(); //LinkedList implementation
|
|
|
|
|
|
|
|
glyphs.addAll(getBindu(dc));
|
|
|
|
}
|
|
|
|
|
|
|
|
else {
|
|
|
|
dc = TibetanMachineWeb.getGlyph(next);
|
|
|
|
glyphs.add(dc);
|
|
|
|
}
|
|
|
|
|
|
|
|
isSanskrit = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
else if (TibetanMachineWeb.isWylieVowel(next)) {
|
|
|
|
if (!chars.isEmpty()) {
|
|
|
|
glyphs.addAll(getGlyphs(chars, true, !isSanskrit, isSanskrit));
|
|
|
|
dc = (DuffCode)glyphs.removeLast(); //LinkedList implementation
|
|
|
|
glyphs.addAll(getVowel(dc, next));
|
|
|
|
chars.clear();
|
|
|
|
}
|
|
|
|
else { //if previous is punctuation or null, then achen plus vowel - otherwise, previous could be vowel
|
|
|
|
int size = glyphs.size();
|
|
|
|
|
|
|
|
vowel_block: {
|
|
|
|
if (size > 1) {
|
|
|
|
dc = (DuffCode)glyphs.get(glyphs.size()-1);
|
|
|
|
if (!TibetanMachineWeb.isWyliePunc(TibetanMachineWeb.getWylieForGlyph(dc))) {
|
|
|
|
DuffCode dc_2 = (DuffCode)glyphs.removeLast();
|
|
|
|
DuffCode dc_1 = (DuffCode)glyphs.removeLast();
|
|
|
|
glyphs.addAll(getVowel(dc_1, dc_2, next));
|
|
|
|
break vowel_block;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
DuffCode[] dc_array = (DuffCode[])TibetanMachineWeb.getTibHash().get(TibetanMachineWeb.ACHEN);
|
|
|
|
dc = dc_array[TibetanMachineWeb.TMW];
|
|
|
|
glyphs.addAll(getVowel(dc, next));
|
|
|
|
}
|
|
|
|
|
|
|
|
chars.clear();
|
|
|
|
}
|
|
|
|
|
|
|
|
isSanskrit = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
else if (TibetanMachineWeb.isWylieChar(next)) {
|
|
|
|
if (!isSanskrit) //add char to list - it is not sanskrit
|
|
|
|
chars.add(next);
|
|
|
|
|
|
|
|
else if (wasLastSanskritStackingKey) { //add char to list - it is still part of sanskrit stack
|
|
|
|
chars.add(next);
|
|
|
|
wasLastSanskritStackingKey = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
else { //char is no longer part of sanskrit stack, therefore compute and add previous stack
|
|
|
|
glyphs.addAll(getGlyphs(chars, true, !isSanskrit, isSanskrit));
|
|
|
|
chars.clear();
|
|
|
|
chars.add(next);
|
|
|
|
isSanskrit = false;
|
|
|
|
wasLastSanskritStackingKey = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
else if (next.equals(String.valueOf(TibetanMachineWeb.WYLIE_DISAMBIGUATING_KEY))) {
|
|
|
|
if (!chars.isEmpty())
|
|
|
|
glyphs.addAll(getGlyphs(chars, true, !isSanskrit, isSanskrit));
|
|
|
|
|
|
|
|
chars.clear();
|
|
|
|
isSanskrit = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
else if (next.equals(String.valueOf(TibetanMachineWeb.WYLIE_SANSKRIT_STACKING_KEY))) {
|
|
|
|
if (!isSanskrit) { //begin sanskrit stack
|
|
|
|
switch (chars.size()) {
|
|
|
|
case 0:
|
|
|
|
break; //'+' is not "pre-stacking" key
|
|
|
|
|
|
|
|
case 1:
|
|
|
|
isSanskrit = true;
|
|
|
|
wasLastSanskritStackingKey = true;
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
String top_char = (String)chars.get(chars.size()-1);
|
|
|
|
chars.remove(chars.size()-1);
|
|
|
|
glyphs.addAll(getGlyphs(chars, true, !isSanskrit, isSanskrit));
|
|
|
|
chars.clear();
|
|
|
|
chars.add(top_char);
|
|
|
|
isSanskrit = true;
|
|
|
|
wasLastSanskritStackingKey = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
else if (TibetanMachineWeb.isFormatting(next.charAt(0))) {
|
|
|
|
if (!chars.isEmpty())
|
|
|
|
glyphs.addAll(getGlyphs(chars, true, !isSanskrit, isSanskrit));
|
|
|
|
|
|
|
|
dc = new DuffCode(1,next.charAt(0));
|
|
|
|
glyphs.add(dc);
|
|
|
|
chars.clear();
|
|
|
|
isSanskrit = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (next != null)
|
|
|
|
start += next.length();
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!chars.isEmpty()) {
|
|
|
|
glyphs.addAll(getGlyphs(chars, true, !isSanskrit, isSanskrit));
|
|
|
|
chars.clear();
|
|
|
|
}
|
|
|
|
|
|
|
|
DuffData[] dd = convertGlyphs(glyphs);
|
|
|
|
return dd;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Gets the bindu sequence for a given context.
|
|
|
|
* In the TibetanMachineWeb fonts, bindu (anusvara) is realized
|
|
|
|
* differently depending on which vowel it attaches to. Although
|
|
|
|
* the default bindu glyph is affixed to consonants and subscript vowels,
|
|
|
|
* for superscript vowels (i, e, o, etc), there is a single glyph
|
|
|
|
* which merges the bindu and that vowel together. When you pass this
|
|
|
|
* method a glyph context, it will return a List of glyphs which
|
|
|
|
* will either consist of the original glyph followed by the default
|
|
|
|
* bindu glyph, or a composite vowel+bindu glyph.
|
|
|
|
* Note that there is only one glyph in the context. This means that
|
|
|
|
* bindus will not affix properly if superscript vowels are allowed to directly
|
|
|
|
* precede subscript vowels (e.g. pou).
|
|
|
|
* @param dc the DuffCode of the glyph you
|
|
|
|
* want to attach a bindu to
|
|
|
|
* @return a List of DuffCode glyphs that include the
|
|
|
|
* original dc, as well as a bindu
|
|
|
|
*/
|
|
|
|
public static List getBindu(DuffCode dc) {
|
|
|
|
List bindus = new ArrayList();
|
|
|
|
|
|
|
|
if (null == dc) {
|
|
|
|
bindus.add(TibetanMachineWeb.getGlyph(String.valueOf(TibetanMachineWeb.BINDU)));
|
|
|
|
return bindus;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!TibetanMachineWeb.getBinduMap().containsKey(dc)) {
|
|
|
|
bindus.add(dc);
|
|
|
|
bindus.add(TibetanMachineWeb.getGlyph(String.valueOf(TibetanMachineWeb.BINDU)));
|
|
|
|
return bindus;
|
|
|
|
}
|
|
|
|
|
|
|
|
bindus.add((DuffCode)TibetanMachineWeb.getBinduMap().get(dc));
|
|
|
|
return bindus;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Gets the vowel sequence for a given vowel in a given context.
|
|
|
|
* Given a context, this method affixes a vowel and returns the
|
|
|
|
* context plus the vowel. Generally, it is enough to provide just
|
|
|
|
* one glyph for context.
|
|
|
|
* @param context the glyph preceding the vowel you want to affix
|
|
|
|
* @param vowel the vowel you want to affix, in Wylie
|
|
|
|
* @return a List of glyphs equal to the vowel in context
|
|
|
|
*/
|
|
|
|
public static List getVowel(DuffCode context, String vowel) {
|
|
|
|
return getVowel(null, context, vowel);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Gets the vowel sequence for a given vowel in a given context.
|
|
|
|
* Given a context, this method affixes a vowel and returns the context plus the vowel.
|
|
|
|
* Since the choice of vowel glyph depends on the consonant to which it is attached,
|
|
|
|
* generally it is enough to provide just the immediately preceding context. However,
|
|
|
|
* in some cases, double vowels are allowed - for example 'buo'. To find the correct
|
|
|
|
* glyph for 'o', we need 'b' in this case, not 'u'. Note also that some Extended
|
|
|
|
* Wylie vowels correspond to multiple glyphs in TibetanMachineWeb. For example,
|
|
|
|
* the vowel I consists of both an achung and a reverse gigu. All required glyphs
|
|
|
|
* are part of the returned List.
|
|
|
|
* @param context_1 the glyph occurring two glyphs before the vowel you want to affix
|
|
|
|
* @param context_2 the glyph immediately before the vowel you want to affix
|
|
|
|
* @param vowel the vowel you want to affix, in Wylie
|
|
|
|
* @return a List of glyphs equal to the vowel in context
|
|
|
|
*/
|
|
|
|
|
|
|
|
public static List getVowel(DuffCode context_1, DuffCode context_2, String vowel) {
|
|
|
|
List vowels = new ArrayList();
|
|
|
|
|
|
|
|
//this vowel doesn't correspond to a glyph -
|
|
|
|
//so you just return the original context
|
|
|
|
|
|
|
|
if ( vowel.equals(TibetanMachineWeb.WYLIE_aVOWEL) ||
|
|
|
|
TibetanMachineWeb.isTopVowel(context_2)) {
|
|
|
|
if (context_1 != null)
|
|
|
|
vowels.add(context_1);
|
|
|
|
|
|
|
|
vowels.add(context_2);
|
|
|
|
return vowels;
|
|
|
|
}
|
|
|
|
|
|
|
|
//first, the three easiest cases: ai, au, and <i
|
|
|
|
//these vowels have one invariant form - therefore,
|
|
|
|
//dc_context is just returned along with that form
|
|
|
|
|
|
|
|
if (vowel.equals(TibetanMachineWeb.ai_VOWEL)) {
|
|
|
|
if (context_1 != null)
|
|
|
|
vowels.add(context_1);
|
|
|
|
|
|
|
|
vowels.add(context_2);
|
|
|
|
DuffCode[] dc_v = (DuffCode[])TibetanMachineWeb.getTibHash().get(TibetanMachineWeb.ai_VOWEL);
|
|
|
|
vowels.add(dc_v[TibetanMachineWeb.TMW]);
|
|
|
|
return vowels;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (vowel.equals(TibetanMachineWeb.au_VOWEL)) {
|
|
|
|
if (context_1 != null)
|
|
|
|
vowels.add(context_1);
|
|
|
|
|
|
|
|
vowels.add(context_2);
|
|
|
|
DuffCode[] dc_v = (DuffCode[])TibetanMachineWeb.getTibHash().get(TibetanMachineWeb.au_VOWEL);
|
|
|
|
vowels.add(dc_v[TibetanMachineWeb.TMW]);
|
|
|
|
return vowels;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (vowel.equals(TibetanMachineWeb.reverse_i_VOWEL)) {
|
|
|
|
if (context_1 != null)
|
|
|
|
vowels.add(context_1);
|
|
|
|
|
|
|
|
vowels.add(context_2);
|
|
|
|
|
|
|
|
if (!TibetanMachineWeb.isTopVowel(context_2)) {
|
|
|
|
DuffCode[] dc_v = (DuffCode[])TibetanMachineWeb.getTibHash().get(TibetanMachineWeb.reverse_i_VOWEL);
|
|
|
|
vowels.add(dc_v[TibetanMachineWeb.TMW]);
|
|
|
|
}
|
|
|
|
|
|
|
|
return vowels;
|
|
|
|
}
|
|
|
|
|
|
|
|
//second, the vowels i, e, and o
|
|
|
|
//these vowels have many different glyphs each,
|
|
|
|
//whose correct selection depends on the
|
|
|
|
//preceding context. therefore, dc_context is
|
|
|
|
//returned along with the vowel appropriate to
|
|
|
|
//that context
|
|
|
|
|
|
|
|
if (vowel.equals(TibetanMachineWeb.i_VOWEL)) {
|
|
|
|
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
|
|
|
|
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_i);
|
|
|
|
if (null == dc_v && null != context_1) {
|
|
|
|
hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_1);
|
|
|
|
dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_i);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (context_1 != null)
|
|
|
|
vowels.add(context_1);
|
|
|
|
|
|
|
|
vowels.add(context_2);
|
|
|
|
|
|
|
|
if (null != dc_v)
|
|
|
|
vowels.add(dc_v);
|
|
|
|
|
|
|
|
return vowels;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (vowel.equals(TibetanMachineWeb.e_VOWEL)) {
|
|
|
|
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
|
|
|
|
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_e);
|
|
|
|
if (null == dc_v && null != context_1) {
|
|
|
|
hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_1);
|
|
|
|
dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_e);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (context_1 != null)
|
|
|
|
vowels.add(context_1);
|
|
|
|
|
|
|
|
vowels.add(context_2);
|
|
|
|
|
|
|
|
if (null != dc_v)
|
|
|
|
vowels.add(dc_v);
|
|
|
|
|
|
|
|
return vowels;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (vowel.equals(TibetanMachineWeb.o_VOWEL)) {
|
|
|
|
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
|
|
|
|
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_o);
|
|
|
|
if (null == dc_v && null != context_1) {
|
|
|
|
hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_1);
|
|
|
|
dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_o);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (context_1 != null)
|
|
|
|
vowels.add(context_1);
|
|
|
|
|
|
|
|
vowels.add(context_2);
|
|
|
|
|
|
|
|
if (null != dc_v)
|
|
|
|
vowels.add(dc_v);
|
|
|
|
|
|
|
|
return vowels;
|
|
|
|
}
|
|
|
|
|
|
|
|
//next come the vowels u, A, and U
|
|
|
|
//these three vowels are grouped together because they all
|
|
|
|
//can cause the preceding context to change. in particular,
|
|
|
|
//both u and A cannot be affixed to ordinary k or g, but
|
|
|
|
//rather the shortened versions of k and g - therefore,
|
|
|
|
|
|
|
|
if (vowel.equals(TibetanMachineWeb.u_VOWEL)) {
|
|
|
|
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
|
|
|
|
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
|
|
|
|
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_u);
|
|
|
|
|
|
|
|
if (null != context_1)
|
|
|
|
vowels.add(context_1);
|
|
|
|
|
|
|
|
if (null == halfHeight)
|
|
|
|
vowels.add(context_2);
|
|
|
|
else
|
|
|
|
vowels.add(halfHeight);
|
|
|
|
|
|
|
|
if (null != dc_v)
|
|
|
|
vowels.add(dc_v);
|
|
|
|
|
|
|
|
return vowels;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (vowel.equals(TibetanMachineWeb.A_VOWEL)) {
|
|
|
|
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
|
|
|
|
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
|
|
|
|
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_A);
|
|
|
|
|
|
|
|
if (null != context_1)
|
|
|
|
vowels.add(context_1);
|
|
|
|
|
|
|
|
if (null == halfHeight)
|
|
|
|
vowels.add(context_2);
|
|
|
|
else
|
|
|
|
vowels.add(halfHeight);
|
|
|
|
|
|
|
|
if (null != dc_v)
|
|
|
|
|
|
|
|
vowels.add(dc_v);
|
|
|
|
|
|
|
|
return vowels;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (vowel.equals(TibetanMachineWeb.U_VOWEL)) {
|
|
|
|
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
|
|
|
|
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
|
|
|
|
DuffCode dc_v = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_U);
|
|
|
|
|
|
|
|
if (null != context_1)
|
|
|
|
vowels.add(context_1);
|
|
|
|
|
|
|
|
if (null == halfHeight)
|
|
|
|
vowels.add(context_2);
|
|
|
|
else
|
|
|
|
vowels.add(halfHeight);
|
|
|
|
|
|
|
|
if (null != dc_v && !TibetanMachineWeb.isTopVowel(context_2))
|
|
|
|
vowels.add(dc_v);
|
|
|
|
|
|
|
|
return vowels;
|
|
|
|
}
|
|
|
|
|
|
|
|
//finally, the vowels I and <I
|
|
|
|
//these vowels are unique in that they both
|
|
|
|
//require a change from the previous character,
|
|
|
|
//and consist of two glyphs themselves
|
|
|
|
|
|
|
|
if (vowel.equals(TibetanMachineWeb.I_VOWEL)) {
|
|
|
|
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
|
|
|
|
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
|
|
|
|
DuffCode dc_v_sub = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_A);
|
|
|
|
DuffCode dc_v_sup = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_i);
|
|
|
|
|
|
|
|
if (null != context_1)
|
|
|
|
vowels.add(context_1);
|
|
|
|
|
|
|
|
if (null == halfHeight)
|
|
|
|
vowels.add(context_2);
|
|
|
|
else
|
|
|
|
vowels.add(halfHeight);
|
|
|
|
|
|
|
|
if (null != dc_v_sub && null != dc_v_sup) {
|
|
|
|
vowels.add(dc_v_sub);
|
|
|
|
vowels.add(dc_v_sup);
|
|
|
|
}
|
|
|
|
|
|
|
|
return vowels;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (vowel.equals(TibetanMachineWeb.reverse_I_VOWEL)) {
|
|
|
|
String hashKey_context = TibetanMachineWeb.getHashKeyForGlyph(context_2);
|
|
|
|
DuffCode halfHeight = TibetanMachineWeb.getHalfHeightGlyph(hashKey_context);
|
|
|
|
DuffCode dc_v_sub = TibetanMachineWeb.getVowel(hashKey_context, TibetanMachineWeb.VOWEL_A);
|
|
|
|
DuffCode[] tv_array = (DuffCode[])TibetanMachineWeb.getTibHash().get(TibetanMachineWeb.reverse_i_VOWEL);
|
|
|
|
DuffCode dc_v_sup = tv_array[TibetanMachineWeb.TMW];
|
|
|
|
|
|
|
|
if (null != context_1)
|
|
|
|
vowels.add(context_1);
|
|
|
|
|
|
|
|
if (null == halfHeight)
|
|
|
|
vowels.add(context_2);
|
|
|
|
else
|
|
|
|
vowels.add(halfHeight);
|
|
|
|
|
|
|
|
if (null != dc_v_sub && null != dc_v_sup) {
|
|
|
|
vowels.add(dc_v_sub);
|
|
|
|
vowels.add(dc_v_sup);
|
|
|
|
}
|
|
|
|
|
|
|
|
return vowels;
|
|
|
|
}
|
|
|
|
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
|
2003-03-30 02:31:16 +00:00
|
|
|
/**
|
|
|
|
* True if you want TibetanMachineWeb-to-Extended-Wylie conversion
|
|
|
|
* to produce Wylie that, if typed, will produce the same sequence
|
|
|
|
* of TibetanMachineWeb glyphs. Without it, converting the glyphs
|
|
|
|
* you get from typing jskad, skaska, skaskaska, skaskaskaska,
|
|
|
|
* etc. will not give you Wylie, that, if typed in again, will
|
|
|
|
* produce the original glyphs. Hence, if this is true, then you
|
|
|
|
* get working, end-to-end Wylie for syntactically illegal
|
|
|
|
* sequences of glyphs. */
|
|
|
|
private static final boolean makeIllegalTibetanGoEndToEnd = true;
|
|
|
|
|
|
|
|
|
|
|
|
/** Returns "a", unless wylie is already "a". */
|
|
|
|
private static String aVowelToUseAfter(String wylie) {
|
|
|
|
if (wylie.equals(TibetanMachineWeb.ACHEN))
|
|
|
|
return "";
|
|
|
|
else
|
|
|
|
return TibetanMachineWeb.WYLIE_aVOWEL;
|
|
|
|
}
|
|
|
|
|
|
|
|
private static String unambiguousPostAVowelWylie(String wylie1,
|
|
|
|
String wylie2) {
|
|
|
|
String disambiguator = "";
|
|
|
|
// type "lard" vs. "lar.d", and you'll see the need for this
|
|
|
|
// disambiguation of suffix and postsuffix. sa doesn't take
|
|
|
|
// any head letters, so only da needs to be considered.
|
|
|
|
if (TibetanMachineWeb.isWylieTop(wylie1)
|
|
|
|
&& wylie2.equals(/* FIXME: hard-coded */ "d"))
|
|
|
|
disambiguator
|
|
|
|
= new String(new char[] { TibetanMachineWeb.WYLIE_DISAMBIGUATING_KEY });
|
|
|
|
return wylie1 + disambiguator + wylie2;
|
|
|
|
}
|
|
|
|
|
2002-11-02 03:38:59 +00:00
|
|
|
/**
|
|
|
|
* Scans a list of glyphs and returns an Extended Wylie string with 'a' inserted.
|
|
|
|
* Passed a list of TibetanMachineWeb glyphs that constitute a partial
|
|
|
|
* syllable that is in need of an 'a' vowel, this method scans the list,
|
|
|
|
* figures out where to put the 'a' vowel, and then returns a string
|
|
|
|
* of Wylie corresponding to this sequence. This method is used
|
|
|
|
* heavily during TibetanMachineWeb to Extended Wylie conversion,
|
|
|
|
* since there is no glyph corresponding to the Extended Wylie 'a' vowel.
|
2003-03-30 02:31:16 +00:00
|
|
|
* @param glyphList a list of TibetanMachineWeb glyphs, i.e. {@link
|
|
|
|
* org.thdl.tib.text.DuffCode DuffCodes}. Pass in an ArrayList if you
|
|
|
|
* care at all for speed.
|
2002-11-02 03:38:59 +00:00
|
|
|
* @return the Wylie string corresponding to this glyph list, with 'a' inserted.
|
|
|
|
*/
|
|
|
|
public static String withA(java.util.List glyphList) {
|
|
|
|
StringBuffer sb = new StringBuffer();
|
|
|
|
int size = glyphList.size();
|
|
|
|
String wylie;
|
2003-03-30 02:31:16 +00:00
|
|
|
String lastWylie = "";
|
2002-11-02 03:38:59 +00:00
|
|
|
|
|
|
|
switch (size) {
|
2003-03-30 02:31:16 +00:00
|
|
|
case 0:
|
|
|
|
return "";
|
|
|
|
|
|
|
|
case 1: //only one glyph: 'a' goes after it
|
|
|
|
wylie = TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(0));
|
|
|
|
sb.append(wylie);
|
|
|
|
sb.append(aVowelToUseAfter(wylie));
|
|
|
|
|
|
|
|
return sb.toString();
|
|
|
|
|
|
|
|
case 2: //two glyphs: 'a' either goes after first or after both
|
|
|
|
lastWylie = TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(0));
|
|
|
|
sb.append(lastWylie);
|
|
|
|
wylie = TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(1));
|
|
|
|
if (TibetanMachineWeb.isWylieRight(wylie)) {
|
|
|
|
sb.append(aVowelToUseAfter(lastWylie));
|
|
|
|
sb.append(wylie);
|
|
|
|
} else {
|
|
|
|
/* handle illegal two-glyph combinations,
|
|
|
|
* e.g., skaska */
|
|
|
|
if (makeIllegalTibetanGoEndToEnd
|
|
|
|
&& !TibetanMachineWeb.isWylieLeft(lastWylie)) {
|
|
|
|
sb.append(aVowelToUseAfter(lastWylie));
|
|
|
|
}
|
|
|
|
|
|
|
|
// FIXME: "g" and "y" should not be hard-coded here.
|
|
|
|
// Instead, TibetanMachineWeb should introduce relevant sets
|
|
|
|
|
|
|
|
if (lastWylie.equals("g") && wylie.equals("y"))
|
|
|
|
sb.append(TibetanMachineWeb.WYLIE_DISAMBIGUATING_KEY);
|
|
|
|
|
|
|
|
if (!wylie.equals(TibetanMachineWeb.ACHEN)) {
|
|
|
|
sb.append(wylie);
|
|
|
|
sb.append(TibetanMachineWeb.WYLIE_aVOWEL);
|
|
|
|
} else {
|
|
|
|
sb.append(TibetanMachineWeb.WYLIE_DISAMBIGUATING_KEY);
|
|
|
|
sb.append(wylie);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return sb.toString();
|
|
|
|
|
|
|
|
default:
|
|
|
|
/* Three or more characters: 'a' goes before last two,
|
|
|
|
* between last two, or in final position, unless we have
|
|
|
|
* something like pa'am, in which case the vowel comes
|
|
|
|
* before the first ACHEN. */
|
|
|
|
|
|
|
|
/* First, allow for pa'am, and even pa'am'ang, and
|
|
|
|
* even bskyars'am'ang. 'i, 'o, 'i, 'u, etc. will not
|
|
|
|
* occur because this is a call to withA, so vowels
|
|
|
|
* aren't in the glyphList. We will look at the end
|
|
|
|
* of the glyphList (and no, with an ArrayList, this
|
|
|
|
* is not O(glyphList.size()), it is O(1)) and work
|
|
|
|
* our way backward, building up tailEndWylie as we
|
|
|
|
* go. */
|
|
|
|
{
|
|
|
|
StringBuffer tailEndWylie = null;
|
|
|
|
int effectiveSize = size - 2;
|
|
|
|
while (effectiveSize >= 0
|
|
|
|
&& TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(effectiveSize)).equals(TibetanMachineWeb.ACHUNG)) {
|
|
|
|
if (null == tailEndWylie) tailEndWylie = new StringBuffer();
|
|
|
|
// prepend:
|
|
|
|
tailEndWylie.insert(0,
|
|
|
|
TibetanMachineWeb.ACHUNG
|
|
|
|
+ aVowelToUseAfter(TibetanMachineWeb.ACHUNG)
|
|
|
|
+ TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(effectiveSize + 1)));
|
|
|
|
effectiveSize -= 2;
|
|
|
|
}
|
|
|
|
if (null != tailEndWylie) {
|
|
|
|
return (withA(glyphList.subList(0, effectiveSize + 2))
|
|
|
|
+ tailEndWylie.toString());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (makeIllegalTibetanGoEndToEnd
|
|
|
|
&& (size > 4 // this is too many glyphs to be legal
|
|
|
|
// this is illegal because it doesn't begin
|
|
|
|
// with a prefix:
|
|
|
|
|| (size == 4
|
|
|
|
&& (!TibetanMachineWeb.isWylieLeft(TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(0)))
|
|
|
|
// this is illegal because it doesn't have a
|
|
|
|
// suffix in the proper place, e.g. mjskad:
|
|
|
|
|| !TibetanMachineWeb.isWylieRight(TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(size - 2)))
|
|
|
|
// this is illegal because it doesn't have a
|
|
|
|
// postsuffix in the proper place,
|
|
|
|
// e.g. 'lan.g, which would otherwise become
|
|
|
|
// 'lang (with nga, not na and then ga):
|
|
|
|
|| !TibetanMachineWeb.isWylieFarRight(TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(size - 1))))))) {
|
|
|
|
for (int i = 0; i < size; i++) {
|
|
|
|
wylie = TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(i));
|
|
|
|
if ((lastWylie.equals("g") && wylie.equals("y"))
|
|
|
|
|| (i != 0 && wylie.equals(TibetanMachineWeb.ACHEN)))
|
|
|
|
sb.append(TibetanMachineWeb.WYLIE_DISAMBIGUATING_KEY);
|
|
|
|
|
|
|
|
sb.append(wylie + aVowelToUseAfter(wylie));
|
|
|
|
lastWylie = wylie;
|
|
|
|
}
|
|
|
|
return sb.toString();
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Else, chew up all the glyphs except for the last two. Then decide. */
|
|
|
|
int i = 0;
|
|
|
|
while (i+2 < size) {
|
|
|
|
wylie = TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(i));
|
|
|
|
if ((lastWylie.equals("g") && wylie.equals("y"))
|
|
|
|
|| (i != 0 && wylie.equals(TibetanMachineWeb.ACHEN)))
|
|
|
|
sb.append(TibetanMachineWeb.WYLIE_DISAMBIGUATING_KEY);
|
|
|
|
|
|
|
|
sb.append(wylie);
|
|
|
|
lastWylie = wylie;
|
|
|
|
i++;
|
|
|
|
}
|
|
|
|
|
|
|
|
String wylie1
|
|
|
|
= TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(i));
|
|
|
|
String wylie2
|
|
|
|
= TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(i + 1));
|
|
|
|
|
|
|
|
if (size == 3) {
|
|
|
|
String wylie0 = lastWylie;
|
|
|
|
// Let's see if wylie0+wylie1+wylie2 is ambiguous
|
|
|
|
// -- if wylie0 could be a prefix and if wylie1
|
|
|
|
// could be a suffix, and if wylie2 is "s". If
|
|
|
|
// it's ambigous, let's look up
|
|
|
|
// wylie0+wylie1+wylie2 in our magic table.
|
|
|
|
// Otherwise, see if we have a prefix, and if we
|
|
|
|
// do, the "a" vowel comes after wylie1. Else the
|
|
|
|
// "a" vowel comes after wylie0.
|
|
|
|
if (TibetanMachineWeb.isWylieLeft(wylie0)) {
|
|
|
|
/* is it ambiguous? */
|
|
|
|
if (TibetanMachineWeb.isWylieRight(wylie1)
|
|
|
|
&& TibetanMachineWeb.SA.equals(wylie2)) {
|
|
|
|
/* Yes, this is ambiguous. How do we handle it? See this from Andres:
|
|
|
|
|
|
|
|
I'm posting this upon David Chandler's request. According to Lobsang
|
|
|
|
Thonden in Modern Tibetan Grammar Language (page 42), with regards to
|
|
|
|
identifying the root letter in 3 lettered words there are only 23
|
|
|
|
ambiguous cases. He writes:
|
|
|
|
|
|
|
|
If the last letter is 'sa' and the first two letters are affixes, then
|
|
|
|
the SECOND ONE is the root letter in the following 9 WORDS ONLY:
|
|
|
|
|
|
|
|
gdas gnas gsas dgas dmas bdas mdas 'gas 'das
|
|
|
|
|
|
|
|
And the FIRST is the root letter in the following 14 WORDS ONLY:
|
|
|
|
|
|
|
|
rags lags nags bags bangs gangs rangs langs nangs sangs
|
|
|
|
babs rabs rams nams
|
|
|
|
|
|
|
|
As I mentioned before, I think that the best solution for now is to
|
|
|
|
hard-wire these cases. Even if the list is not exhaustive, at least
|
|
|
|
we'll have most cases covered.
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* FIXME: these constants are hard-wired here,
|
|
|
|
* rather than in TibetanMachineWeb, because
|
|
|
|
* I'm lazy. */
|
|
|
|
if ((wylie0.equals("g") && (wylie1.equals("d") || wylie1.equals("n") || wylie1.equals("s")))
|
|
|
|
|| (wylie0.equals("d") && (wylie1.equals("g") || wylie1.equals("m")))
|
|
|
|
|| (wylie0.equals("b") && wylie1.equals("d"))
|
|
|
|
|| (wylie0.equals("m") && wylie1.equals("d"))
|
|
|
|
|| (wylie0.equals("'") && (wylie1.equals("g") || wylie1.equals("d")))) {
|
|
|
|
sb.append(wylie1
|
|
|
|
+ aVowelToUseAfter(wylie1)
|
|
|
|
+ wylie2);
|
|
|
|
} else {
|
|
|
|
sb.append(aVowelToUseAfter(wylie0)
|
|
|
|
+ unambiguousPostAVowelWylie(wylie1,
|
|
|
|
wylie2));
|
|
|
|
}
|
|
|
|
|
|
|
|
// DLC FIXME: what about ambiguity between
|
|
|
|
// wa-zur and wa? dwa vs. d.wa, e.g.?
|
|
|
|
|
|
|
|
// DLC FIXME: disambiguators are needed for
|
|
|
|
// this case too, as b.lag vs. blag
|
|
|
|
// illustrates. Use something based on this,
|
|
|
|
// from LegalTshegBar.java:
|
|
|
|
//
|
|
|
|
// boolean disambiguatorNeeded = false;
|
|
|
|
// char prefix = getPrefix();
|
|
|
|
// sb.append(UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeCodepoint(prefix));
|
|
|
|
// if (!hasHeadLetter()) {
|
|
|
|
// if (EWC_ya == rootLetter) {
|
|
|
|
// if (isConsonantThatTakesYaBtags(prefix))
|
|
|
|
// disambiguatorNeeded = true;
|
|
|
|
// } else if (EWC_ra == rootLetter) {
|
|
|
|
// if (isConsonantThatTakesRaBtags(prefix))
|
|
|
|
// disambiguatorNeeded = true;
|
|
|
|
// } else if (EWC_la == rootLetter) {
|
|
|
|
// if (isConsonantThatTakesLaBtags(prefix))
|
|
|
|
// disambiguatorNeeded = true;
|
|
|
|
// } else if (EWC_wa == rootLetter) {
|
|
|
|
// if (isConsonantThatTakesWaZur(prefix))
|
|
|
|
// disambiguatorNeeded = true;
|
|
|
|
// }
|
|
|
|
// }
|
|
|
|
// if (disambiguatorNeeded)
|
|
|
|
// sb.append(TibetanMachineWeb.WYLIE_DISAMBIGUATING_KEY);
|
|
|
|
|
|
|
|
|
|
|
|
} else {
|
|
|
|
/* no ambiguity. the "a" vowel comes after
|
|
|
|
* wylie1. */
|
|
|
|
sb.append(wylie1
|
|
|
|
+ aVowelToUseAfter(wylie1)
|
|
|
|
+ wylie2);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (makeIllegalTibetanGoEndToEnd
|
|
|
|
&& !(TibetanMachineWeb.isWylieRight(wylie1)
|
|
|
|
&& TibetanMachineWeb.isWylieFarRight(wylie2))) {
|
|
|
|
/* handle skaskaska, e.g. */
|
|
|
|
sb.append(aVowelToUseAfter(wylie0)
|
|
|
|
+ wylie1
|
|
|
|
+ aVowelToUseAfter(wylie1)
|
|
|
|
+ wylie2
|
|
|
|
+ aVowelToUseAfter(wylie2));
|
|
|
|
} else {
|
|
|
|
/* no ambiguity. the "a" vowel comes after
|
|
|
|
* wylie0. */
|
|
|
|
sb.append(aVowelToUseAfter(wylie0)
|
|
|
|
+ unambiguousPostAVowelWylie(wylie1,
|
|
|
|
wylie2));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/* If size==4, then we assume this is legal. If
|
|
|
|
* size==5, anything will do! So assume we have a
|
|
|
|
* prefix, a root letter, a suffix, and a postsuffix.
|
|
|
|
* The "a" vowel comes after the root letter. */
|
|
|
|
sb.append(aVowelToUseAfter(lastWylie)
|
|
|
|
+ unambiguousPostAVowelWylie(wylie1,
|
|
|
|
wylie2));
|
|
|
|
}
|
|
|
|
return sb.toString();
|
|
|
|
}
|
2002-11-02 03:38:59 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Gets the Extended Wylie for a list of glyphs.
|
|
|
|
* Passed a list of TibetanMachineWeb glyphs that constitute a partial
|
|
|
|
* or complete syllable, this method scans the list, and then returns a
|
|
|
|
* string of Wylie corresponding to this sequence. No 'a' vowel is
|
|
|
|
* inserted because it is assumed that the glyph list already contains
|
|
|
|
* some other vowel. If the glyph list does not already contain a vowel,
|
|
|
|
* then this method should not be called.
|
|
|
|
*
|
2003-03-30 02:31:16 +00:00
|
|
|
* @param glyphList a list of TibetanMachineWeb glyphs, i.e. {@link org.thdl.tib.text.DuffCode DuffCodes}
|
2002-11-02 03:38:59 +00:00
|
|
|
* @return the Wylie string corresponding to this glyph list
|
|
|
|
*/
|
2003-03-30 02:31:16 +00:00
|
|
|
public static String withoutA(java.util.ArrayList glyphList) {
|
2002-11-02 03:38:59 +00:00
|
|
|
StringBuffer sb = new StringBuffer();
|
|
|
|
Iterator iter = glyphList.iterator();
|
|
|
|
DuffCode dc;
|
|
|
|
String currWylie;
|
|
|
|
String lastWylie = new String();
|
|
|
|
|
|
|
|
while (iter.hasNext()) {
|
|
|
|
dc = (DuffCode)iter.next();
|
|
|
|
currWylie = TibetanMachineWeb.getWylieForGlyph(dc);
|
|
|
|
|
|
|
|
//note: "g" and "y" should not be hard-coded
|
|
|
|
// instead, TibetanMachineWeb should introduce relevant sets
|
|
|
|
|
2003-03-30 02:31:16 +00:00
|
|
|
if ((lastWylie.equals("g") && currWylie.equals("y"))
|
|
|
|
|| (!lastWylie.equals("")
|
|
|
|
&& currWylie.equals(TibetanMachineWeb.ACHEN)))
|
|
|
|
sb.append(TibetanMachineWeb.WYLIE_DISAMBIGUATING_KEY);
|
2002-11-02 03:38:59 +00:00
|
|
|
|
|
|
|
sb.append(currWylie);
|
|
|
|
|
|
|
|
lastWylie = currWylie;
|
|
|
|
}
|
|
|
|
|
|
|
|
return sb.toString();
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
2003-03-30 02:31:16 +00:00
|
|
|
* Gets the Extended Wylie for a sequence of glyphs.
|
2002-11-02 03:38:59 +00:00
|
|
|
* @param dcs an array of glyphs
|
|
|
|
* @return the Extended Wylie corresponding to these glyphs
|
|
|
|
*/
|
|
|
|
public static String getWylie(DuffCode[] dcs) {
|
|
|
|
if (dcs.length == 0)
|
|
|
|
return null;
|
|
|
|
|
|
|
|
char ch;
|
|
|
|
String wylie;
|
|
|
|
|
2003-03-30 02:31:16 +00:00
|
|
|
ArrayList glyphList = new ArrayList();
|
2002-11-02 03:38:59 +00:00
|
|
|
boolean needsVowel = true;
|
|
|
|
boolean isLastVowel = false;
|
|
|
|
int start = 0;
|
|
|
|
StringBuffer wylieBuffer = new StringBuffer();
|
|
|
|
|
2003-03-30 02:31:16 +00:00
|
|
|
for (int i=start; i<dcs.length; i++) {
|
|
|
|
ch = dcs[i].character;
|
|
|
|
int k = dcs[i].charNum;
|
|
|
|
// int fontNum = dcs[i].fontNum;
|
|
|
|
|
|
|
|
if (k < 32) {
|
|
|
|
if (wylieBuffer.length() > 0 || !glyphList.isEmpty()) {
|
|
|
|
String thisPart;
|
|
|
|
if (needsVowel)
|
|
|
|
thisPart = withA(glyphList);
|
|
|
|
else
|
|
|
|
thisPart = withoutA(glyphList);
|
|
|
|
wylieBuffer.append(thisPart);
|
|
|
|
|
|
|
|
glyphList.clear();
|
|
|
|
needsVowel = true;
|
|
|
|
isLastVowel = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
wylieBuffer.append(ch);
|
|
|
|
} else {
|
|
|
|
wylie = TibetanMachineWeb.getWylieForGlyph(dcs[i]);
|
|
|
|
|
|
|
|
boolean containsBindu = false;
|
|
|
|
if (wylie.length() > 1 && wylie.charAt(wylie.length()-1) == TibetanMachineWeb.BINDU) {
|
|
|
|
char[] cArray = wylie.toCharArray();
|
|
|
|
wylie = new String(cArray, 0, wylie.length()-1);
|
|
|
|
containsBindu = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
process_block: {
|
|
|
|
if (TibetanMachineWeb.isWyliePunc(wylie)) {
|
|
|
|
isLastVowel = false;
|
|
|
|
|
|
|
|
if (glyphList.isEmpty()) {
|
|
|
|
wylieBuffer.append(wylie);
|
|
|
|
} else {
|
|
|
|
String thisPart;
|
|
|
|
if (needsVowel)
|
|
|
|
thisPart = withA(glyphList);
|
|
|
|
else
|
|
|
|
thisPart = withoutA(glyphList);
|
|
|
|
wylieBuffer.append(thisPart);
|
|
|
|
|
|
|
|
wylieBuffer.append(wylie); //append the punctuation
|
|
|
|
|
|
|
|
glyphList.clear();
|
|
|
|
}
|
|
|
|
needsVowel = true; //next consonants are syllable onset, so we are awaiting vowel
|
|
|
|
} else if (TibetanMachineWeb.isWylieChar(wylie)) {
|
2002-11-02 03:38:59 +00:00
|
|
|
//isChar must come before isVowel because ACHEN has priority over WYLIE_aVOWEL
|
2003-03-30 02:31:16 +00:00
|
|
|
isLastVowel = false;
|
|
|
|
glyphList.add(dcs[i]);
|
|
|
|
} else if (TibetanMachineWeb.isWylieVowel(wylie)) {
|
|
|
|
if (isLastVowel) {
|
|
|
|
int len = wylieBuffer.length();
|
|
|
|
int A_len = TibetanMachineWeb.A_VOWEL.length();
|
|
|
|
|
|
|
|
if (wylieBuffer.substring(len-A_len).equals(TibetanMachineWeb.A_VOWEL)) {
|
|
|
|
try {
|
|
|
|
if (wylie.equals(TibetanMachineWeb.i_VOWEL)) {
|
|
|
|
wylieBuffer.delete(len-A_len, len);
|
|
|
|
wylieBuffer.append(TibetanMachineWeb.I_VOWEL);
|
|
|
|
isLastVowel = false;
|
|
|
|
break process_block;
|
|
|
|
} else if (wylie.equals(TibetanMachineWeb.reverse_i_VOWEL)) {
|
|
|
|
wylieBuffer.delete(len-A_len, len);
|
|
|
|
wylieBuffer.append(TibetanMachineWeb.reverse_I_VOWEL);
|
|
|
|
isLastVowel = false;
|
|
|
|
break process_block;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
catch (StringIndexOutOfBoundsException se) {
|
|
|
|
ThdlDebug.noteIffyCode();
|
|
|
|
}
|
|
|
|
|
|
|
|
wylieBuffer.append(wylie); //append current vowel
|
|
|
|
isLastVowel = false;
|
|
|
|
} else
|
|
|
|
wylieBuffer.append(wylie); //append current vowel
|
|
|
|
} else {
|
|
|
|
int glyphCount = glyphList.size();
|
|
|
|
boolean insertDisAmbig = false;
|
|
|
|
|
|
|
|
if (0 != glyphCount) {
|
|
|
|
DuffCode top_dc = (DuffCode)glyphList.get(glyphCount-1);
|
|
|
|
String top_wylie = TibetanMachineWeb.getWylieForGlyph(top_dc);
|
|
|
|
|
|
|
|
if (top_wylie.equals(TibetanMachineWeb.ACHEN)) {
|
|
|
|
glyphList.remove(glyphCount-1);
|
2002-11-02 03:38:59 +00:00
|
|
|
|
2003-03-30 02:31:16 +00:00
|
|
|
if (glyphCount-1 == 0) {
|
|
|
|
top_dc = null;
|
|
|
|
} else {
|
|
|
|
insertDisAmbig = true;
|
|
|
|
top_dc = (DuffCode)glyphList.get(glyphCount-2);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (top_dc == null || !TibetanMachineWeb.getWylieForGlyph(top_dc).equals(TibetanMachineWeb.ACHUNG)) {
|
|
|
|
String thisPart = withoutA(glyphList);
|
|
|
|
wylieBuffer.append(thisPart); //append consonants in glyphList
|
|
|
|
} else {
|
|
|
|
glyphCount = glyphList.size();
|
|
|
|
glyphList.remove(glyphCount-1);
|
2002-11-02 03:38:59 +00:00
|
|
|
|
2003-03-30 02:31:16 +00:00
|
|
|
if (glyphCount-1 != 0) {
|
|
|
|
String thisPart = withA(glyphList);
|
|
|
|
wylieBuffer.append(thisPart);
|
|
|
|
}
|
|
|
|
|
|
|
|
wylieBuffer.append(TibetanMachineWeb.ACHUNG);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (insertDisAmbig)
|
|
|
|
wylieBuffer.append(TibetanMachineWeb.WYLIE_DISAMBIGUATING_KEY);
|
|
|
|
|
|
|
|
wylieBuffer.append(wylie); //append vowel
|
|
|
|
|
|
|
|
glyphList.clear();
|
|
|
|
isLastVowel = true;
|
|
|
|
needsVowel = false;
|
|
|
|
}
|
|
|
|
} else { //must be a stack
|
|
|
|
isLastVowel = false;
|
|
|
|
glyphList.add(dcs[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (containsBindu) {
|
|
|
|
isLastVowel = false;
|
|
|
|
wylieBuffer.append(withoutA(glyphList));
|
|
|
|
wylieBuffer.append(TibetanMachineWeb.BINDU); //append the bindu
|
|
|
|
glyphList.clear();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
//replace TMW with Wylie
|
|
|
|
|
|
|
|
if (!glyphList.isEmpty()) {
|
|
|
|
String thisPart;
|
|
|
|
if (needsVowel)
|
|
|
|
thisPart = withA(glyphList);
|
|
|
|
else
|
|
|
|
thisPart = withoutA(glyphList);
|
|
|
|
wylieBuffer.append(thisPart);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (wylieBuffer.length() > 0)
|
|
|
|
return wylieBuffer.toString();
|
|
|
|
else
|
|
|
|
return null;
|
2002-11-02 03:38:59 +00:00
|
|
|
}
|
|
|
|
}
|