/* The contents of this file are subject to the THDL Open Community License Version 1.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License on the THDL web site (http://www.thdl.org/). Software distributed under the License is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for the specific terms governing rights and limitations under the License. The Initial Developer of this software is the Tibetan and Himalayan Digital Library (THDL). Portions created by the THDL are Copyright 2001 THDL. All Rights Reserved. Contributor(s): ______________________________________. */ package org.thdl.tib.text; import java.util.*; import javax.swing.*; import javax.swing.text.*; import javax.swing.text.rtf.RTFEditorKit; import java.io.*; import org.thdl.util.ThdlDebug; /** * Provides methods for converting back and forth between Extended * Wylie and TibetanMachineWeb. This class is not instantiable. * *

* The class provides a variety of static methods for converting * back and forth between Extended Wylie and TibetanMachineWeb. The * Wylie can be accessed as a String, while the TibetanMachineWeb can * be exported as Rich Text Format. * * @author Edward Garrett, Tibetan and Himalayan Digital Library */ public class TibTextUtils { /** Do not use this contructor. */ private TibTextUtils() { super(); } /** * Converts a list of glyphs into an array of {@link DuffData DuffData}. * The motivation for this is that most processes - for example using * TibetanMachineWeb in HTML - only need to know what * text to output, and when to change fonts. In general, they don't * need to have an explicit indication for each glyph of the font * for that glyph. * @param glyphs the list of TibetanMachineWeb glyphs * you want to convert * @return an array of DuffData corresponding to this * list of glyphs */ public static DuffData[] convertGlyphs(List glyphs) { if (glyphs.size() == 0) return null; List data = new ArrayList(); StringBuffer sb = new StringBuffer(); Iterator iter = glyphs.iterator(); DuffCode dc = (DuffCode)iter.next(); int lastfont = dc.fontNum; sb.append(dc.character); while (iter.hasNext()) { dc = (DuffCode)iter.next(); if (dc.fontNum == lastfont) sb.append(dc.character); else { data.add(new DuffData(sb.toString(), lastfont)); lastfont = dc.fontNum; sb = new StringBuffer(); sb.append(dc.character); } } data.add(new DuffData(sb.toString(), lastfont)); DuffData[] dd = new DuffData[0]; dd = (DuffData[])data.toArray(dd); return dd; } /** * Figures out how to arrange a list of characters into glyphs. For example, if the user types 'bsgr' * using the Extended Wylie keyboard, this method figures out that this should be represented * as a 'b' glyph followed by a 's-g-r' glyph. If you know that the characters do not * contain Sanskrit stacks, or do not contain Tibetan stacks, then you can specify this * to speed the process up. Otherwise, the method will first check to see if the characters * correspond to any Tibetan stacks, and if not, then it will check for Sanskrit stacks. * @param chars the list of Tibetan characters you want to find glyphs for * @param areStacksOnRight whether stacking should try to maximize from right to left (true) * or from left to right (false). In the Extended Wylie keyboard, you try to stack from * right to left. Thus, the character sequence r-g-r would be stacked as r followed by gr, * rather than rg followed by r. In the Sambhota and TCC keyboards, the stack direction * is reversed. * @param definitelyTibetan should be true if the characters are known to be Tibetan and * not Sanskrit * @param definitelySanskrit should be true if the characters are known to be Sanskrit and * not Tibetan */ public static List getGlyphs(List chars, boolean areStacksOnRight, boolean definitelyTibetan, boolean definitelySanskrit) { StringBuffer tibBuffer, sanBuffer; String tibCluster, sanCluster; boolean checkTibetan, checkSanskrit; if (!(definitelyTibetan || definitelySanskrit)) { checkTibetan = true; checkSanskrit = true; } else { checkTibetan = definitelyTibetan; checkSanskrit = definitelySanskrit; } int length = chars.size(); List glyphs = new ArrayList(); glyphs.clear(); if (areStacksOnRight) { for (int i=0; i-1; i--) { tibBuffer = new StringBuffer(); tibCluster = null; sanBuffer = new StringBuffer(); sanCluster = null; Iterator iter = chars.iterator(); for (int k=0; k 1) { dc = (DuffCode)glyphs.get(glyphs.size()-1); if (!TibetanMachineWeb.isWyliePunc(TibetanMachineWeb.getWylieForGlyph(dc))) { DuffCode dc_2 = (DuffCode)glyphs.removeLast(); DuffCode dc_1 = (DuffCode)glyphs.removeLast(); glyphs.addAll(getVowel(dc_1, dc_2, next)); break vowel_block; } } DuffCode[] dc_array = (DuffCode[])TibetanMachineWeb.getTibHash().get(TibetanMachineWeb.ACHEN); dc = dc_array[TibetanMachineWeb.TMW]; glyphs.addAll(getVowel(dc, next)); } chars.clear(); } isSanskrit = false; } else if (TibetanMachineWeb.isWylieChar(next)) { if (!isSanskrit) //add char to list - it is not sanskrit chars.add(next); else if (wasLastSanskritStackingKey) { //add char to list - it is still part of sanskrit stack chars.add(next); wasLastSanskritStackingKey = false; } else { //char is no longer part of sanskrit stack, therefore compute and add previous stack glyphs.addAll(getGlyphs(chars, true, !isSanskrit, isSanskrit)); chars.clear(); chars.add(next); isSanskrit = false; wasLastSanskritStackingKey = false; } } else if (next.equals(String.valueOf(TibetanMachineWeb.WYLIE_DISAMBIGUATING_KEY))) { if (!chars.isEmpty()) glyphs.addAll(getGlyphs(chars, true, !isSanskrit, isSanskrit)); chars.clear(); isSanskrit = false; } else if (next.equals(String.valueOf(TibetanMachineWeb.WYLIE_SANSKRIT_STACKING_KEY))) { if (!isSanskrit) { //begin sanskrit stack switch (chars.size()) { case 0: break; //'+' is not "pre-stacking" key case 1: isSanskrit = true; wasLastSanskritStackingKey = true; break; default: String top_char = (String)chars.get(chars.size()-1); chars.remove(chars.size()-1); glyphs.addAll(getGlyphs(chars, true, !isSanskrit, isSanskrit)); chars.clear(); chars.add(top_char); isSanskrit = true; wasLastSanskritStackingKey = true; break; } } } else if (TibetanMachineWeb.isFormatting(next.charAt(0))) { if (!chars.isEmpty()) glyphs.addAll(getGlyphs(chars, true, !isSanskrit, isSanskrit)); dc = new DuffCode(1,next.charAt(0)); glyphs.add(dc); chars.clear(); isSanskrit = false; } if (next != null) start += next.length(); } if (!chars.isEmpty()) { glyphs.addAll(getGlyphs(chars, true, !isSanskrit, isSanskrit)); chars.clear(); } DuffData[] dd = convertGlyphs(glyphs); return dd; } /** * Gets the bindu sequence for a given context. * In the TibetanMachineWeb fonts, bindu (anusvara) is realized * differently depending on which vowel it attaches to. Although * the default bindu glyph is affixed to consonants and subscript vowels, * for superscript vowels (i, e, o, etc), there is a single glyph * which merges the bindu and that vowel together. When you pass this * method a glyph context, it will return a List of glyphs which * will either consist of the original glyph followed by the default * bindu glyph, or a composite vowel+bindu glyph. * Note that there is only one glyph in the context. This means that * bindus will not affix properly if superscript vowels are allowed to directly * precede subscript vowels (e.g. pou). * @param dc the DuffCode of the glyph you * want to attach a bindu to * @return a List of DuffCode glyphs that include the * original dc, as well as a bindu */ public static List getBindu(DuffCode dc) { List bindus = new ArrayList(); if (null == dc) { bindus.add(TibetanMachineWeb.getGlyph(String.valueOf(TibetanMachineWeb.BINDU))); return bindus; } if (!TibetanMachineWeb.getBinduMap().containsKey(dc)) { bindus.add(dc); bindus.add(TibetanMachineWeb.getGlyph(String.valueOf(TibetanMachineWeb.BINDU))); return bindus; } bindus.add((DuffCode)TibetanMachineWeb.getBinduMap().get(dc)); return bindus; } /** * Gets the vowel sequence for a given vowel in a given context. * Given a context, this method affixes a vowel and returns the * context plus the vowel. Generally, it is enough to provide just * one glyph for context. * @param context the glyph preceding the vowel you want to affix * @param vowel the vowel you want to affix, in Wylie * @return a List of glyphs equal to the vowel in context */ public static List getVowel(DuffCode context, String vowel) { return getVowel(null, context, vowel); } /** * Gets the vowel sequence for a given vowel in a given context. * Given a context, this method affixes a vowel and returns the context plus the vowel. * Since the choice of vowel glyph depends on the consonant to which it is attached, * generally it is enough to provide just the immediately preceding context. However, * in some cases, double vowels are allowed - for example 'buo'. To find the correct * glyph for 'o', we need 'b' in this case, not 'u'. Note also that some Extended * Wylie vowels correspond to multiple glyphs in TibetanMachineWeb. For example, * the vowel I consists of both an achung and a reverse gigu. All required glyphs * are part of the returned List. * @param context_1 the glyph occurring two glyphs before the vowel you want to affix * @param context_2 the glyph immediately before the vowel you want to affix * @param vowel the vowel you want to affix, in Wylie * @return a List of glyphs equal to the vowel in context */ public static List getVowel(DuffCode context_1, DuffCode context_2, String vowel) { List vowels = new ArrayList(); //this vowel doesn't correspond to a glyph - //so you just return the original context if ( vowel.equals(TibetanMachineWeb.WYLIE_aVOWEL) || TibetanMachineWeb.isTopVowel(context_2)) { if (context_1 != null) vowels.add(context_1); vowels.add(context_2); return vowels; } //first, the three easiest cases: ai, au, and 0 || !glyphList.isEmpty()) { if (needsVowel) wylieBuffer.append(withA(glyphList)); else wylieBuffer.append(withoutA(glyphList)); glyphList.clear(); needsVowel = true; isLastVowel = false; } wylieBuffer.append(ch); } else { wylie = TibetanMachineWeb.getWylieForGlyph(dcs[i]); boolean containsBindu = false; if (wylie.length() > 1 && wylie.charAt(wylie.length()-1) == TibetanMachineWeb.BINDU) { char[] cArray = wylie.toCharArray(); wylie = new String(cArray, 0, wylie.length()-1); containsBindu = true; } process_block: { if (TibetanMachineWeb.isWyliePunc(wylie)) { isLastVowel = false; if (glyphList.isEmpty()) wylieBuffer.append(wylie); else { if (needsVowel) wylieBuffer.append(withA(glyphList)); else wylieBuffer.append(withoutA(glyphList)); wylieBuffer.append(wylie); //append the punctuation glyphList.clear(); } needsVowel = true; //next consonants are syllable onset, so we are awaiting vowel } //isChar must come before isVowel because ACHEN has priority over WYLIE_aVOWEL else if (TibetanMachineWeb.isWylieChar(wylie)) { isLastVowel = false; glyphList.add(dcs[i]); } else if (TibetanMachineWeb.isWylieVowel(wylie)) { if (isLastVowel) { int len = wylieBuffer.length(); int A_len = TibetanMachineWeb.A_VOWEL.length(); if (wylieBuffer.substring(len-A_len).equals(TibetanMachineWeb.A_VOWEL)) { try { if (wylie.equals(TibetanMachineWeb.i_VOWEL)) { wylieBuffer.delete(len-A_len, len); wylieBuffer.append(TibetanMachineWeb.I_VOWEL); isLastVowel = false; break process_block; } else if (wylie.equals(TibetanMachineWeb.reverse_i_VOWEL)) { wylieBuffer.delete(len-A_len, len); wylieBuffer.append(TibetanMachineWeb.reverse_I_VOWEL); isLastVowel = false; break process_block; } } catch (StringIndexOutOfBoundsException se) { ThdlDebug.noteIffyCode(); } wylieBuffer.append(wylie); //append current vowel isLastVowel = false; } else wylieBuffer.append(wylie); //append current vowel } else { int glyphCount = glyphList.size(); boolean insertDisAmbig = false; if (0 != glyphCount) { DuffCode top_dc = (DuffCode)glyphList.get(glyphCount-1); String top_wylie = TibetanMachineWeb.getWylieForGlyph(top_dc); if (top_wylie.equals(TibetanMachineWeb.ACHEN)) { glyphList.remove(glyphCount-1); if (glyphCount-1 == 0) top_dc = null; else { insertDisAmbig = true; top_dc = (DuffCode)glyphList.get(glyphCount-2); } } if (top_dc == null || !TibetanMachineWeb.getWylieForGlyph(top_dc).equals(TibetanMachineWeb.ACHUNG)) wylieBuffer.append(withoutA(glyphList)); //append consonants in glyphList else { glyphCount = glyphList.size(); glyphList.remove(glyphCount-1); if (glyphCount-1 != 0) wylieBuffer.append(withA(glyphList)); wylieBuffer.append(TibetanMachineWeb.ACHUNG); } } if (insertDisAmbig) wylieBuffer.append(TibetanMachineWeb.WYLIE_DISAMBIGUATING_KEY); wylieBuffer.append(wylie); //append vowel glyphList.clear(); isLastVowel = true; needsVowel = false; } } else { //must be a stack isLastVowel = false; glyphList.add(dcs[i]); } } if (containsBindu) { isLastVowel = false; wylieBuffer.append(withoutA(glyphList)); wylieBuffer.append(TibetanMachineWeb.BINDU); //append the bindu glyphList.clear(); } } } //replace TMW with Wylie if (!glyphList.isEmpty()) { if (needsVowel) wylieBuffer.append(withA(glyphList)); else wylieBuffer.append(withoutA(glyphList)); } if (wylieBuffer.length() > 0) return wylieBuffer.toString(); else return null; } }