diff --git a/source/org/thdl/tib/input/DuffPane.java b/source/org/thdl/tib/input/DuffPane.java index 27e78e8..2e29a0e 100644 --- a/source/org/thdl/tib/input/DuffPane.java +++ b/source/org/thdl/tib/input/DuffPane.java @@ -797,7 +797,8 @@ public class DuffPane extends TibetanPane implements FocusListener { before_vowel.add(dc_1); before_vowel.add(dc_2); - java.util.List after_vowel = TibTextUtils.getVowel(dc_1, dc_2, v); + java.util.List after_vowel = new ArrayList(); + TibTextUtils.getVowel(after_vowel, dc_1, dc_2, v); if (after_vowel.size() >= before_vowel.size()) { setNumberOfGlyphsForLastVowel(after_vowel.size() - before_vowel.size()); @@ -842,7 +843,8 @@ public class DuffPane extends TibetanPane implements FocusListener { private void printAChenWithVowel(String v) { DuffCode[] dc_array = (DuffCode[])TibetanMachineWeb.getTibHash().get(TibetanMachineWeb.ACHEN); DuffCode dc = dc_array[TibetanMachineWeb.TMW]; - java.util.List achenlist = TibTextUtils.getVowel(dc,v); + java.util.List achenlist = new ArrayList(); + TibTextUtils.getVowel(achenlist, dc, v); DuffData[] dd = TibTextUtils.convertGlyphs(achenlist); getTibDoc().insertDuff(caret.getDot(), dd); } diff --git a/source/org/thdl/tib/text/TibTextUtils.java b/source/org/thdl/tib/text/TibTextUtils.java index a83df5e..bf6b98d 100644 --- a/source/org/thdl/tib/text/TibTextUtils.java +++ b/source/org/thdl/tib/text/TibTextUtils.java @@ -375,7 +375,7 @@ public class TibTextUtils implements THDLWylieConstants { if (!chars.isEmpty()) { glyphs.addAll(getGlyphs(chars, true, !isSanskrit, isSanskrit)); dc = (DuffCode)glyphs.removeLast(); //LinkedList implementation - glyphs.addAll(getVowel(dc, next)); + getVowel(glyphs, dc, next); chars.clear(); } else { //if previous is punctuation or null, then achen plus vowel - otherwise, previous could be vowel @@ -387,13 +387,13 @@ public class TibTextUtils implements THDLWylieConstants { if (!TibetanMachineWeb.isWyliePunc(TibetanMachineWeb.getWylieForGlyph(dc, weDoNotCareIfThereIsCorrespondingWylieOrNot))) { DuffCode dc_2 = (DuffCode)glyphs.removeLast(); DuffCode dc_1 = (DuffCode)glyphs.removeLast(); - glyphs.addAll(getVowel(dc_1, dc_2, next)); + getVowel(glyphs, dc_1, dc_2, next); break vowel_block; } } DuffCode[] dc_array = (DuffCode[])TibetanMachineWeb.getTibHash().get(ACHEN); dc = dc_array[TibetanMachineWeb.TMW]; - glyphs.addAll(getVowel(dc, next)); + getVowel(glyphs, dc, next); } chars.clear(); @@ -442,6 +442,7 @@ public class TibTextUtils implements THDLWylieConstants { default: String top_char = (String)chars.get(chars.size()-1); chars.remove(chars.size()-1); + // DLC PERFORMANCE FIXME: make glyphs a parameter glyphs.addAll(getGlyphs(chars, true, !isSanskrit, isSanskrit)); chars.clear(); chars.add(top_char); @@ -520,39 +521,39 @@ public class TibTextUtils implements THDLWylieConstants { * @param vowel the vowel you want to affix, in Wylie * @return a List of glyphs equal to the vowel in context */ - public static List getVowel(DuffCode context, String vowel) { - return getVowel(null, context, vowel); - } + public static void getVowel(List l, DuffCode context, String vowel) { + getVowel(l, null, context, vowel); + } /** -* Gets the vowel sequence for a given vowel in a given context. -* Given a context, this method affixes a vowel and returns the context plus the vowel. -* Since the choice of vowel glyph depends on the consonant to which it is attached, -* generally it is enough to provide just the immediately preceding context. However, -* in some cases, double vowels are allowed - for example 'buo'. To find the correct -* glyph for 'o', we need 'b' in this case, not 'u'. Note also that some Extended -* Wylie vowels correspond to multiple glyphs in TibetanMachineWeb. For example, -* the vowel I consists of both an achung and a reverse gigu. All required glyphs -* are part of the returned List. -* @param context_1 the glyph occurring two glyphs before the vowel you want to affix -* @param context_2 the glyph immediately before the vowel you want to affix -* @param vowel the vowel you want to affix, in Wylie -* @return a List of glyphs equal to the vowel in context -*/ - - public static List getVowel(DuffCode context_1, DuffCode context_2, String vowel) { - List vowels = new ArrayList(); +* Gets the vowel sequence for a given vowel in a given context and +* appends it to l. Given a context, this method affixes a vowel and +* appends the context plus the vowel to l. Since the choice of vowel +* glyph depends on the consonant to which it is attached, generally it +* is enough to provide just the immediately preceding +* context. However, in some cases, double vowels are allowed - for +* example 'buo'. To find the correct glyph for 'o', we need 'b' in +* this case, not 'u'. Note also that some Extended Wylie vowels +* correspond to multiple glyphs in TibetanMachineWeb. For example, the +* vowel I consists of both an achung and a reverse gigu. All required +* glyphs are appended to l. +* @param context_1 the glyph occurring two glyphs before the vowel you +* want to affix +* @param context_2 the glyph immediately before the vowel you want to +* affix +* @param vowel the vowel you want to affix, in Wylie */ + public static void getVowel(List l, DuffCode context_1, DuffCode context_2, String vowel) { //this vowel doesn't correspond to a glyph - //so you just return the original context if ( vowel.equals(WYLIE_aVOWEL) || TibetanMachineWeb.isTopVowel(context_2)) { if (context_1 != null) - vowels.add(context_1); + l.add(context_1); - vowels.add(context_2); - return vowels; + l.add(context_2); + return; } //first, the three easiest cases: ai, au, and = 0) - r.add(TibetanMachineWeb.getVowel(hashKey, TibetanMachineWeb.VOWEL_U)); - else { + if (vowel.startsWith("A")) { + TibTextUtils.getVowel(r, preceding, THDLWylieConstants.WYLIE_aVOWEL); + } else if (vowel.indexOf("'U") >= 0) { + TibTextUtils.getVowel(r, preceding, "U"); + } else { if (vowel.indexOf('\'') >= 0) - r.add(TibetanMachineWeb.getVowel(hashKey, TibetanMachineWeb.VOWEL_A)); + TibTextUtils.getVowel(r, preceding, THDLWylieConstants.A_VOWEL); if (vowel.indexOf("EE") >= 0) - r.add(TibetanMachineWeb.getGlyph("ai")); + TibTextUtils.getVowel(r, preceding, THDLWylieConstants.ai_VOWEL); else if (vowel.indexOf('E') >= 0) - r.add(TibetanMachineWeb.getVowel(hashKey, TibetanMachineWeb.VOWEL_e)); + TibTextUtils.getVowel(r, preceding, THDLWylieConstants.e_VOWEL); if (vowel.indexOf("OO") >= 0) - r.add(TibetanMachineWeb.getGlyph("au")); + TibTextUtils.getVowel(r, preceding, THDLWylieConstants.au_VOWEL); else if (vowel.indexOf('O') >= 0) - r.add(TibetanMachineWeb.getVowel(hashKey, TibetanMachineWeb.VOWEL_o)); + TibTextUtils.getVowel(r, preceding, THDLWylieConstants.o_VOWEL); if (vowel.indexOf('I') >= 0) - r.add(TibetanMachineWeb.getVowel(hashKey, TibetanMachineWeb.VOWEL_i)); + TibTextUtils.getVowel(r, preceding, THDLWylieConstants.i_VOWEL); if (vowel.indexOf('U') >= 0) - r.add(TibetanMachineWeb.getVowel(hashKey, TibetanMachineWeb.VOWEL_u)); + TibTextUtils.getVowel(r, preceding, THDLWylieConstants.u_VOWEL); if (vowel.indexOf('i') >= 0) - r.add(TibetanMachineWeb.getGlyph("-i")); + TibTextUtils.getVowel(r, preceding, THDLWylieConstants.reverse_i_VOWEL); } + if (vowel.indexOf('m') >= 0) r.add(TibetanMachineWeb.getGlyph("M")); if (vowel.indexOf(':') >= 0) r.add(TibetanMachineWeb.getGlyph("H")); + } } diff --git a/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java b/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java index 5f323f8..a8a5acd 100644 --- a/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java +++ b/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java @@ -778,11 +778,22 @@ public class ACIPTshegBarScanner { // careful, so "KA\r\n" and "GA\n" appear where "KA // \r\n" and "GA \n" should appear. if (('\r' == ch - || '\n' == ch) + || ('\n' == ch && i > 0 && s.charAt(i - 1) != '\r')) && !al.isEmpty() && ((ACIPString)al.get(al.size() - 1)).getType() == ACIPString.TIBETAN_NON_PUNCTUATION) { - al.add(new ACIPString(" ", - ACIPString.TIBETAN_PUNCTUATION)); + al.add(new ACIPString(" ", ACIPString.TIBETAN_PUNCTUATION)); + } + + // "DANG,\nLHAG" is really "DANG, LHAG". But always? Not if you have "MDO,\n\nKA...". + if (('\r' == ch + || ('\n' == ch && i > 0 && s.charAt(i - 1) != '\r')) + && !al.isEmpty() + && ((ACIPString)al.get(al.size() - 1)).getType() == ACIPString.TIBETAN_PUNCTUATION + && ((ACIPString)al.get(al.size() - 1)).getText().equals(",") + && s.charAt(i-1) == ',' + && (i + (('\r' == ch) ? 2 : 1) < sl + && (s.charAt(i+(('\r' == ch) ? 2 : 1)) != ch))) { + al.add(new ACIPString(" ", ACIPString.TIBETAN_PUNCTUATION)); } // Don't add in a "\r\n" or "\n" unless there's a diff --git a/source/org/thdl/tib/text/ttt/TPairList.java b/source/org/thdl/tib/text/ttt/TPairList.java index 6549a01..6858734 100644 --- a/source/org/thdl/tib/text/ttt/TPairList.java +++ b/source/org/thdl/tib/text/ttt/TPairList.java @@ -19,6 +19,7 @@ Contributor(s): ______________________________________. package org.thdl.tib.text.ttt; import org.thdl.tib.text.TibetanMachineWeb; +import org.thdl.tib.text.DuffCode; import org.thdl.tib.text.TGCPair; import org.thdl.util.ThdlDebug; @@ -612,6 +613,7 @@ class TPairList { /** Appends the DuffCodes that correspond to this grapheme cluster * to duff. Assumes this is one grapheme cluster. */ void getDuff(ArrayList duff) { + int previousSize = duff.size(); StringBuffer wylieForConsonant = new StringBuffer(); for (int x = 0; x + 1 < size(); x++) { wylieForConsonant.append(get(x).getWylie(false)); @@ -625,8 +627,15 @@ class TPairList { throw new Error("How did this happen?"); } } - duff.add(TibetanMachineWeb.getGlyph(hashKey)); - ACIPRules.getDuffForACIPVowel(duff, hashKey, lastPair.getRight()); + if (lastPair.getRight() == null || lastPair.equals("-")) { + duff.add(TibetanMachineWeb.getGlyph(hashKey)); + } else { + ACIPRules.getDuffForACIPVowel(duff, + TibetanMachineWeb.getGlyph(hashKey), + lastPair.getRight()); + } + if (previousSize == duff.size()) + throw new Error("TPairList with no duffs? " + toString()); // DLC FIXME: change to assertion. } } // DLC FIXME: handle 'o' and 'x', e.g. KAo and NYAx. diff --git a/source/org/thdl/tib/text/ttt/TStackList.java b/source/org/thdl/tib/text/ttt/TStackList.java index 05efc2d..b6901e1 100644 --- a/source/org/thdl/tib/text/ttt/TStackList.java +++ b/source/org/thdl/tib/text/ttt/TStackList.java @@ -217,7 +217,7 @@ class TStackList { } return u.toString(); } - /** DLC DOC */ + /** Returns the DuffCodes corresponding to this stack list. */ DuffCode[] getDuff() { ArrayList al = new ArrayList(size()*2); // rough estimate int count = 0;