Untabified -- whitespace only has changed. Use 'cvs diff -wb' to avoid seeing these differences.

This commit is contained in:
dchandler 2003-10-18 18:34:49 +00:00
parent e5534f69ee
commit 47648186b4

View file

@ -62,34 +62,34 @@ public class TibTextUtils implements THDLWylieConstants {
* @return an array of DuffData corresponding to this * @return an array of DuffData corresponding to this
* list of glyphs * list of glyphs
*/ */
public static DuffData[] convertGlyphs(List glyphs) { public static DuffData[] convertGlyphs(List glyphs) {
if (glyphs.size() == 0) if (glyphs.size() == 0)
return null; return null;
List data = new ArrayList(); List data = new ArrayList();
StringBuffer sb = new StringBuffer(); StringBuffer sb = new StringBuffer();
Iterator iter = glyphs.iterator(); Iterator iter = glyphs.iterator();
DuffCode dc = (DuffCode)iter.next(); DuffCode dc = (DuffCode)iter.next();
int lastfont = dc.getFontNum(); int lastfont = dc.getFontNum();
sb.append(dc.getCharacter()); sb.append(dc.getCharacter());
while (iter.hasNext()) { while (iter.hasNext()) {
dc = (DuffCode)iter.next(); dc = (DuffCode)iter.next();
if (dc.getFontNum() == lastfont) if (dc.getFontNum() == lastfont)
sb.append(dc.getCharacter()); sb.append(dc.getCharacter());
else { else {
data.add(new DuffData(sb.toString(), lastfont)); data.add(new DuffData(sb.toString(), lastfont));
lastfont = dc.getFontNum(); lastfont = dc.getFontNum();
sb = new StringBuffer(); sb = new StringBuffer();
sb.append(dc.getCharacter()); sb.append(dc.getCharacter());
} }
} }
data.add(new DuffData(sb.toString(), lastfont)); data.add(new DuffData(sb.toString(), lastfont));
DuffData[] dd = new DuffData[0]; DuffData[] dd = new DuffData[0];
dd = (DuffData[])data.toArray(dd); dd = (DuffData[])data.toArray(dd);
return dd; return dd;
} }
/** /**
* Figures out how to arrange a list of characters into glyphs. For * Figures out how to arrange a list of characters into glyphs. For
@ -113,135 +113,135 @@ public class TibTextUtils implements THDLWylieConstants {
* @param definitelySanskrit should be true if the characters are known * @param definitelySanskrit should be true if the characters are known
* to be Sanskrit and not Tibetan * to be Sanskrit and not Tibetan
*/ */
public static List getGlyphs(List chars, boolean areStacksOnRight, boolean definitelyTibetan, boolean definitelySanskrit) { public static List getGlyphs(List chars, boolean areStacksOnRight, boolean definitelyTibetan, boolean definitelySanskrit) {
StringBuffer tibBuffer, sanBuffer; StringBuffer tibBuffer, sanBuffer;
String tibCluster, sanCluster; String tibCluster, sanCluster;
boolean checkTibetan, checkSanskrit; boolean checkTibetan, checkSanskrit;
if (!(definitelyTibetan || definitelySanskrit)) { if (!(definitelyTibetan || definitelySanskrit)) {
checkTibetan = true; checkTibetan = true;
checkSanskrit = true; checkSanskrit = true;
} }
else { else {
checkTibetan = definitelyTibetan; checkTibetan = definitelyTibetan;
checkSanskrit = definitelySanskrit; checkSanskrit = definitelySanskrit;
} }
int length = chars.size(); int length = chars.size();
List glyphs = new ArrayList(); List glyphs = new ArrayList();
glyphs.clear(); glyphs.clear();
if (areStacksOnRight) { if (areStacksOnRight) {
for (int i=0; i<length; i++) { for (int i=0; i<length; i++) {
tibBuffer = new StringBuffer(); tibBuffer = new StringBuffer();
tibCluster = null; tibCluster = null;
sanBuffer = new StringBuffer(); sanBuffer = new StringBuffer();
sanCluster = null; sanCluster = null;
for (int k=i; k<length; k++) { for (int k=i; k<length; k++) {
String s = (String)chars.get(k); String s = (String)chars.get(k);
if (checkTibetan) if (checkTibetan)
tibBuffer.append(s); tibBuffer.append(s);
if (checkSanskrit) if (checkSanskrit)
sanBuffer.append(s); sanBuffer.append(s);
if (k!=length-1) { if (k!=length-1) {
if (checkTibetan) if (checkTibetan)
tibBuffer.append("-"); tibBuffer.append("-");
if (checkSanskrit) if (checkSanskrit)
sanBuffer.append("+"); sanBuffer.append("+");
} }
} }
if (checkTibetan) { if (checkTibetan) {
tibCluster = tibBuffer.toString(); tibCluster = tibBuffer.toString();
if (TibetanMachineWeb.hasGlyph(tibCluster)) { if (TibetanMachineWeb.hasGlyph(tibCluster)) {
Iterator iter = chars.iterator(); Iterator iter = chars.iterator();
for (int k=0; k<i; k++) //should really check here to make sure glyphs exist FIXME for (int k=0; k<i; k++) //should really check here to make sure glyphs exist FIXME
glyphs.add(TibetanMachineWeb.getGlyph((String)iter.next())); glyphs.add(TibetanMachineWeb.getGlyph((String)iter.next()));
glyphs.add(TibetanMachineWeb.getGlyph(tibCluster)); glyphs.add(TibetanMachineWeb.getGlyph(tibCluster));
return glyphs; return glyphs;
} }
} }
if (checkSanskrit) { if (checkSanskrit) {
sanCluster = sanBuffer.toString(); sanCluster = sanBuffer.toString();
if (TibetanMachineWeb.hasGlyph(sanCluster)) { if (TibetanMachineWeb.hasGlyph(sanCluster)) {
Iterator iter = chars.iterator(); Iterator iter = chars.iterator();
for (int k=0; k<i; k++) //should really check here to make sure glyphs exist FIXME for (int k=0; k<i; k++) //should really check here to make sure glyphs exist FIXME
glyphs.add(TibetanMachineWeb.getGlyph((String)iter.next())); glyphs.add(TibetanMachineWeb.getGlyph((String)iter.next()));
glyphs.add(TibetanMachineWeb.getGlyph(sanCluster)); glyphs.add(TibetanMachineWeb.getGlyph(sanCluster));
return glyphs; return glyphs;
} }
} }
} }
} }
else { else {
for (int i=length-1; i>-1; i--) { for (int i=length-1; i>-1; i--) {
tibBuffer = new StringBuffer(); tibBuffer = new StringBuffer();
tibCluster = null; tibCluster = null;
sanBuffer = new StringBuffer(); sanBuffer = new StringBuffer();
sanCluster = null; sanCluster = null;
Iterator iter = chars.iterator(); Iterator iter = chars.iterator();
for (int k=0; k<i+1; k++) { for (int k=0; k<i+1; k++) {
String s = (String)iter.next(); String s = (String)iter.next();
if (checkTibetan) if (checkTibetan)
tibBuffer.append(s); tibBuffer.append(s);
if (checkSanskrit) if (checkSanskrit)
sanBuffer.append(s); sanBuffer.append(s);
if (k!=i) { if (k!=i) {
if (checkTibetan) if (checkTibetan)
tibBuffer.append("-"); tibBuffer.append("-");
if (checkSanskrit) if (checkSanskrit)
sanBuffer.append("+"); sanBuffer.append("+");
} }
} }
if (checkTibetan) { if (checkTibetan) {
tibCluster = tibBuffer.toString(); tibCluster = tibBuffer.toString();
if (TibetanMachineWeb.hasGlyph(tibCluster)) { if (TibetanMachineWeb.hasGlyph(tibCluster)) {
glyphs.add(TibetanMachineWeb.getGlyph(tibCluster)); glyphs.add(TibetanMachineWeb.getGlyph(tibCluster));
for (int k=i+1; k<length; k++) for (int k=i+1; k<length; k++)
glyphs.add(TibetanMachineWeb.getGlyph((String)iter.next())); glyphs.add(TibetanMachineWeb.getGlyph((String)iter.next()));
return glyphs; return glyphs;
} }
} }
if (checkSanskrit) { if (checkSanskrit) {
sanCluster = sanBuffer.toString(); sanCluster = sanBuffer.toString();
if (TibetanMachineWeb.hasGlyph(sanCluster)) { if (TibetanMachineWeb.hasGlyph(sanCluster)) {
glyphs.add(TibetanMachineWeb.getGlyph(sanCluster)); glyphs.add(TibetanMachineWeb.getGlyph(sanCluster));
for (int k=i+1; k<length; k++) for (int k=i+1; k<length; k++)
glyphs.add(TibetanMachineWeb.getGlyph((String)iter.next())); glyphs.add(TibetanMachineWeb.getGlyph((String)iter.next()));
return glyphs; return glyphs;
} }
} }
} }
} }
return null; return null;
} }
/** /**
* Finds the first meaningful element to occur within a string of * Finds the first meaningful element to occur within a string of
@ -253,56 +253,56 @@ public class TibTextUtils implements THDLWylieConstants {
* no meaningful subpart can be found (for example 'x' has no equivalent * no meaningful subpart can be found (for example 'x' has no equivalent
* in Extended Wylie) * in Extended Wylie)
*/ */
public static String getNext(String wylie) { public static String getNext(String wylie) {
boolean hasThereBeenValidity = false; boolean hasThereBeenValidity = false;
boolean isThereValidity = false; boolean isThereValidity = false;
String s; String s;
int i; int i;
int offset = 0; int offset = 0;
char c = wylie.charAt(offset); char c = wylie.charAt(offset);
int k = (int)c; int k = (int)c;
if (k < 32) //return null if character is just formatting if (k < 32) //return null if character is just formatting
return String.valueOf(c); return String.valueOf(c);
if (c == WYLIE_DISAMBIGUATING_KEY) if (c == WYLIE_DISAMBIGUATING_KEY)
return String.valueOf(WYLIE_DISAMBIGUATING_KEY); return String.valueOf(WYLIE_DISAMBIGUATING_KEY);
if (c == WYLIE_SANSKRIT_STACKING_KEY) if (c == WYLIE_SANSKRIT_STACKING_KEY)
return String.valueOf(WYLIE_SANSKRIT_STACKING_KEY); return String.valueOf(WYLIE_SANSKRIT_STACKING_KEY);
for (i=offset+1; i<wylie.length()+1; i++) { for (i=offset+1; i<wylie.length()+1; i++) {
s = wylie.substring(offset, i); s = wylie.substring(offset, i);
if (!isThereValidity) { if (!isThereValidity) {
if (TibetanMachineWeb.isWyliePunc(s) || TibetanMachineWeb.isWylieVowel(s) || TibetanMachineWeb.isWylieChar(s)) { if (TibetanMachineWeb.isWyliePunc(s) || TibetanMachineWeb.isWylieVowel(s) || TibetanMachineWeb.isWylieChar(s)) {
isThereValidity = true; isThereValidity = true;
hasThereBeenValidity = true; hasThereBeenValidity = true;
} }
} }
else { else {
if (!TibetanMachineWeb.isWyliePunc(s) && !TibetanMachineWeb.isWylieVowel(s) && !TibetanMachineWeb.isWylieChar(s)) { if (!TibetanMachineWeb.isWyliePunc(s) && !TibetanMachineWeb.isWylieVowel(s) && !TibetanMachineWeb.isWylieChar(s)) {
isThereValidity = false; isThereValidity = false;
break; break;
} }
} }
} }
if (!hasThereBeenValidity) if (!hasThereBeenValidity)
s = null; s = null;
else { else {
if (isThereValidity) //the whole text region is valid if (isThereValidity) //the whole text region is valid
s = wylie.substring(offset, wylie.length()); s = wylie.substring(offset, wylie.length());
else //the loop was broken out of else //the loop was broken out of
s = wylie.substring(offset, i-1); s = wylie.substring(offset, i-1);
} }
return s; return s;
} }
/** An array containing one boolean value. Pass this to /** An array containing one boolean value. Pass this to
TibetanMachineWeb.getWylieForGlyph(..) if you don't care if a TibetanMachineWeb.getWylieForGlyph(..) if you don't care if a
@ -318,163 +318,163 @@ public class TibTextUtils implements THDLWylieConstants {
* @throws InvalidWylieException if the Wylie is deemed invalid, * @throws InvalidWylieException if the Wylie is deemed invalid,
* i.e. if it does not conform to the Extended Wylie standard * i.e. if it does not conform to the Extended Wylie standard
*/ */
public static DuffData[] getTibetanMachineWeb(String wylie) throws InvalidWylieException { public static DuffData[] getTibetanMachineWeb(String wylie) throws InvalidWylieException {
List chars = new ArrayList(); List chars = new ArrayList();
DuffCode dc; DuffCode dc;
int start = 0; int start = 0;
boolean isSanskrit = false; boolean isSanskrit = false;
boolean wasLastSanskritStackingKey = false; boolean wasLastSanskritStackingKey = false;
LinkedList glyphs = new LinkedList(); LinkedList glyphs = new LinkedList();
while (start < wylie.length()) { while (start < wylie.length()) {
String next = getNext(wylie.substring(start)); String next = getNext(wylie.substring(start));
if (next == null) { if (next == null) {
if (!chars.isEmpty()) { if (!chars.isEmpty()) {
glyphs.addAll(getGlyphs(chars, true, !isSanskrit, isSanskrit)); glyphs.addAll(getGlyphs(chars, true, !isSanskrit, isSanskrit));
chars.clear(); chars.clear();
isSanskrit = false; isSanskrit = false;
} }
else { //could not convert - throw exception else { //could not convert - throw exception
// FIXME: we're printing to stdout! // FIXME: we're printing to stdout!
if (start+5 <= wylie.length()) { if (start+5 <= wylie.length()) {
System.out.println("Bad wylie: " System.out.println("Bad wylie: "
+ wylie.substring(start, + wylie.substring(start,
start + 5)); start + 5));
} else { } else {
System.out.println("Bad wylie: "+wylie.substring(start)); System.out.println("Bad wylie: "+wylie.substring(start));
} }
throw new InvalidWylieException(wylie, start); throw new InvalidWylieException(wylie, start);
} }
} }
else if (TibetanMachineWeb.isWyliePunc(next)) { else if (TibetanMachineWeb.isWyliePunc(next)) {
if (!chars.isEmpty()) if (!chars.isEmpty())
glyphs.addAll(getGlyphs(chars, true, !isSanskrit, isSanskrit)); glyphs.addAll(getGlyphs(chars, true, !isSanskrit, isSanskrit));
chars.clear(); chars.clear();
if (next.equals(String.valueOf(BINDU))) { if (next.equals(String.valueOf(BINDU))) {
if (glyphs.isEmpty()) if (glyphs.isEmpty())
dc = null; dc = null;
else else
dc = (DuffCode)glyphs.removeLast(); //LinkedList implementation dc = (DuffCode)glyphs.removeLast(); //LinkedList implementation
getBindu(glyphs, dc); getBindu(glyphs, dc);
} }
else { else {
dc = TibetanMachineWeb.getGlyph(next); dc = TibetanMachineWeb.getGlyph(next);
glyphs.add(dc); glyphs.add(dc);
} }
isSanskrit = false; isSanskrit = false;
} }
else if (TibetanMachineWeb.isWylieVowel(next)) { else if (TibetanMachineWeb.isWylieVowel(next)) {
if (!chars.isEmpty()) { if (!chars.isEmpty()) {
glyphs.addAll(getGlyphs(chars, true, !isSanskrit, isSanskrit)); glyphs.addAll(getGlyphs(chars, true, !isSanskrit, isSanskrit));
dc = (DuffCode)glyphs.removeLast(); //LinkedList implementation dc = (DuffCode)glyphs.removeLast(); //LinkedList implementation
getVowel(glyphs, dc, next); getVowel(glyphs, dc, next);
chars.clear(); chars.clear();
} }
else { //if previous is punctuation or null, then achen plus vowel - otherwise, previous could be vowel else { //if previous is punctuation or null, then achen plus vowel - otherwise, previous could be vowel
int size = glyphs.size(); int size = glyphs.size();
vowel_block: { vowel_block: {
if (size > 1) { if (size > 1) {
dc = (DuffCode)glyphs.get(glyphs.size()-1); dc = (DuffCode)glyphs.get(glyphs.size()-1);
if (!TibetanMachineWeb.isWyliePunc(TibetanMachineWeb.getWylieForGlyph(dc, weDoNotCareIfThereIsCorrespondingWylieOrNot))) { if (!TibetanMachineWeb.isWyliePunc(TibetanMachineWeb.getWylieForGlyph(dc, weDoNotCareIfThereIsCorrespondingWylieOrNot))) {
DuffCode dc_2 = (DuffCode)glyphs.removeLast(); DuffCode dc_2 = (DuffCode)glyphs.removeLast();
DuffCode dc_1 = (DuffCode)glyphs.removeLast(); DuffCode dc_1 = (DuffCode)glyphs.removeLast();
getVowel(glyphs, dc_1, dc_2, next); getVowel(glyphs, dc_1, dc_2, next);
break vowel_block; break vowel_block;
} }
} }
DuffCode[] dc_array = (DuffCode[])TibetanMachineWeb.getTibHash().get(ACHEN); DuffCode[] dc_array = (DuffCode[])TibetanMachineWeb.getTibHash().get(ACHEN);
dc = dc_array[TibetanMachineWeb.TMW]; dc = dc_array[TibetanMachineWeb.TMW];
getVowel(glyphs, dc, next); getVowel(glyphs, dc, next);
} }
chars.clear(); chars.clear();
} }
isSanskrit = false; isSanskrit = false;
} }
else if (TibetanMachineWeb.isWylieChar(next)) { else if (TibetanMachineWeb.isWylieChar(next)) {
if (!isSanskrit) //add char to list - it is not sanskrit if (!isSanskrit) //add char to list - it is not sanskrit
chars.add(next); chars.add(next);
else if (wasLastSanskritStackingKey) { //add char to list - it is still part of sanskrit stack else if (wasLastSanskritStackingKey) { //add char to list - it is still part of sanskrit stack
chars.add(next); chars.add(next);
wasLastSanskritStackingKey = false; wasLastSanskritStackingKey = false;
} }
else { //char is no longer part of sanskrit stack, therefore compute and add previous stack else { //char is no longer part of sanskrit stack, therefore compute and add previous stack
glyphs.addAll(getGlyphs(chars, true, !isSanskrit, isSanskrit)); glyphs.addAll(getGlyphs(chars, true, !isSanskrit, isSanskrit));
chars.clear(); chars.clear();
chars.add(next); chars.add(next);
isSanskrit = false; isSanskrit = false;
wasLastSanskritStackingKey = false; wasLastSanskritStackingKey = false;
} }
} }
else if (next.equals(String.valueOf(WYLIE_DISAMBIGUATING_KEY))) { else if (next.equals(String.valueOf(WYLIE_DISAMBIGUATING_KEY))) {
if (!chars.isEmpty()) if (!chars.isEmpty())
glyphs.addAll(getGlyphs(chars, true, !isSanskrit, isSanskrit)); glyphs.addAll(getGlyphs(chars, true, !isSanskrit, isSanskrit));
chars.clear(); chars.clear();
isSanskrit = false; isSanskrit = false;
} }
else if (next.equals(String.valueOf(WYLIE_SANSKRIT_STACKING_KEY))) { else if (next.equals(String.valueOf(WYLIE_SANSKRIT_STACKING_KEY))) {
if (!isSanskrit) { //begin sanskrit stack if (!isSanskrit) { //begin sanskrit stack
switch (chars.size()) { switch (chars.size()) {
case 0: case 0:
break; //'+' is not "pre-stacking" key break; //'+' is not "pre-stacking" key
case 1: case 1:
isSanskrit = true; isSanskrit = true;
wasLastSanskritStackingKey = true; wasLastSanskritStackingKey = true;
break; break;
default: default:
String top_char = (String)chars.get(chars.size()-1); String top_char = (String)chars.get(chars.size()-1);
chars.remove(chars.size()-1); chars.remove(chars.size()-1);
// DLC PERFORMANCE FIXME: make glyphs a parameter // DLC PERFORMANCE FIXME: make glyphs a parameter
glyphs.addAll(getGlyphs(chars, true, !isSanskrit, isSanskrit)); glyphs.addAll(getGlyphs(chars, true, !isSanskrit, isSanskrit));
chars.clear(); chars.clear();
chars.add(top_char); chars.add(top_char);
isSanskrit = true; isSanskrit = true;
wasLastSanskritStackingKey = true; wasLastSanskritStackingKey = true;
break; break;
} }
} }
} }
else if (TibetanMachineWeb.isFormatting(next.charAt(0))) { else if (TibetanMachineWeb.isFormatting(next.charAt(0))) {
if (!chars.isEmpty()) if (!chars.isEmpty())
glyphs.addAll(getGlyphs(chars, true, !isSanskrit, isSanskrit)); glyphs.addAll(getGlyphs(chars, true, !isSanskrit, isSanskrit));
dc = new DuffCode(1,next.charAt(0)); dc = new DuffCode(1,next.charAt(0));
glyphs.add(dc); glyphs.add(dc);
chars.clear(); chars.clear();
isSanskrit = false; isSanskrit = false;
} }
if (next != null) if (next != null)
start += next.length(); start += next.length();
} }
if (!chars.isEmpty()) { if (!chars.isEmpty()) {
glyphs.addAll(getGlyphs(chars, true, !isSanskrit, isSanskrit)); glyphs.addAll(getGlyphs(chars, true, !isSanskrit, isSanskrit));
chars.clear(); chars.clear();
} }
DuffData[] dd = convertGlyphs(glyphs); DuffData[] dd = convertGlyphs(glyphs);
return dd; return dd;
} }
/** /**
* Gets the bindu sequence for a given context. In the * Gets the bindu sequence for a given context. In the