diff --git a/source/org/thdl/tib/text/DuffCode.java b/source/org/thdl/tib/text/DuffCode.java index dc04e92..4159aa6 100644 --- a/source/org/thdl/tib/text/DuffCode.java +++ b/source/org/thdl/tib/text/DuffCode.java @@ -36,17 +36,17 @@ import org.thdl.util.ThdlDebug; * @author Edward Garrett, Tibetan and Himalayan Digital Library * @author David Chandler */ -public final class DuffCode { +public final /* immutable */ class DuffCode { /** * the font number in which this glyph can be found, from 1 * (TibetanMachineWeb/TibetanMachine) ... to 5 * (TibetanMachineWeb4/TibetanMachineSkt4) ... to 10 * (TibetanMachineWeb9/[Invalid for TM family]). */ - private byte fontNum; + private /* final if the compiler were smarter */ byte fontNum; /** * the character value of this glyph, as an integer (that is, ordinal) */ - private byte charNum; + private /* final if the compiler were smarter */ byte charNum; /** * Called by {@link TibetanMachineWeb} to generate diff --git a/source/org/thdl/tib/text/SizedDuffCode.java b/source/org/thdl/tib/text/SizedDuffCode.java new file mode 100644 index 0000000..0b610e6 --- /dev/null +++ b/source/org/thdl/tib/text/SizedDuffCode.java @@ -0,0 +1,38 @@ +/* +The contents of this file are subject to the THDL Open Community License +Version 1.0 (the "License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License on the THDL web site +(http://www.thdl.org/). + +Software distributed under the License is distributed on an "AS IS" basis, +WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +License for the specific terms governing rights and limitations under the +License. + +The Initial Developer of this software is the Tibetan and Himalayan Digital +Library (THDL). Portions created by the THDL are Copyright 2004 THDL. +All Rights Reserved. + +Contributor(s): ______________________________________. +*/ + +package org.thdl.tib.text; + +/** +* An immutable representation of a Tibetan glyph of a certain size in +* the TibetanMachineWeb or TibetanMachine families of fonts. +* +*
A SizedDuffCode is a pair of a font size and a {@link +* DuffCode}.
+* +* @author David Chandler */ +final /* immutable */ class SizedDuffCode { + private final DuffCode dc; + private final int fontSize; + public SizedDuffCode(DuffCode dc, int fontSize) { + this.dc = dc; + this.fontSize = fontSize; + } + public DuffCode getDuffCode() { return dc; } + public int getFontSize() { return fontSize; } +} diff --git a/source/org/thdl/tib/text/TibTextUtils.java b/source/org/thdl/tib/text/TibTextUtils.java index f0cee0e..6429990 100644 --- a/source/org/thdl/tib/text/TibTextUtils.java +++ b/source/org/thdl/tib/text/TibTextUtils.java @@ -936,13 +936,13 @@ public class TibTextUtils implements THDLWylieConstants { * @param noSuch an array which will not be touched if this is * successful; however, if there is no THDL Extended Wylie/ACIP * corresponding to these glyphs, then noSuch[0] will be set to true -* @return the Extended Wylie/ACIP corresponding to these glyphs, or -* null */ - public static String getTranslit(boolean EWTSNotACIP, - DuffCode[] dcs, - boolean noSuch[]) { +* @return the Extended Wylie/ACIP corresponding to these glyphs (with +* font size info), or null */ + public static TranslitList getTranslit(boolean EWTSNotACIP, + SizedDuffCode[] dcs, + boolean noSuch[]) { StringBuffer warnings = (debug ? new StringBuffer() : null); - String ans + TranslitList ans = getTranslitImplementation(EWTSNotACIP, dcs, noSuch, warnings); if (debug && warnings.length() > 0) System.out.println("DEBUG: warnings in TMW->Wylie: " + warnings); @@ -985,7 +985,7 @@ public class TibTextUtils implements THDLWylieConstants { int pairType = TGCPair.TYPE_OTHER; for (int i = 0; i < sz; i++) { - DuffCode dc = (DuffCode)glyphList.get(i); + DuffCode dc = ((SizedDuffCode)glyphList.get(i)).getDuffCode(); String wylie = TibetanMachineWeb.getWylieForGlyph(dc, noSuchWylie); boolean buildingUpSanskritNext = false; if ((buildingUpSanskritNext @@ -1314,12 +1314,13 @@ public class TibTextUtils implements THDLWylieConstants { } /** Appends to translitBuffer the EWTS/ACIP for the glyph list - glyphList (which should be an ArrayList for speed). This will - be very user-friendly for "legal tsheg bars" and will be - valid, but possibly ugly (interspersed with disambiguators or - extra vowels, etc.) Wylie/ACIP for other things, such as - Sanskrit transliteration. Updates warnings and noSuch like - the caller does. + glyphList (which should be an ArrayList for speed). The font + size of the transliteration will be fontSize. The + transliteration will be very user-friendly for "legal tsheg + bars" and will be valid, but possibly ugly (interspersed with + disambiguators or extra vowels, etc.) Wylie/ACIP for other + things, such as Sanskrit transliteration. Updates warnings + and noSuch like the caller does.What constitutes a legal, non-punctuation, non-whitespace tsheg bar? The following are the only such:
@@ -1366,7 +1367,10 @@ public class TibTextUtils implements THDLWylieConstants { java.util.List glyphList, boolean noSuch[], StringBuffer warnings, - StringBuffer translitBuffer) { + TranslitList translitBuffer) { + // FIXME: If font size changes within a tsheg-bar, we don't + // handle that. + int fontSize = ((SizedDuffCode)glyphList.get(0)).getFontSize(); TGCList gcs = breakTshegBarIntoGraphemeClusters(glyphList, noSuch); String candidateType = getClassificationOfTshegBar(gcs, warnings, false); @@ -1397,16 +1401,18 @@ public class TibTextUtils implements THDLWylieConstants { // and a.u and a.i, we always do it (see Rule 10 // of the September 1, 2003 draft of EWTS // standard). - translitBuffer.append(WYLIE_DISAMBIGUATING_KEY); + translitBuffer.append(WYLIE_DISAMBIGUATING_KEY, fontSize); } - translitBuffer.append(translit); + translitBuffer.append(translit, fontSize); if (TibetanMachineWeb.isWylieTibetanConsonantOrConsonantStack(wylie) || TibetanMachineWeb.isWylieSanskritConsonantStack(wylie)) { - translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie)); + translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie), fontSize); } else if (i + 1 < sz) { if (TGCPair.CONSONANTAL_WITH_VOWEL != cls && TGCPair.SANSKRIT_WITH_VOWEL != cls) - translitBuffer.append(EWTSNotACIP ? WYLIE_DISAMBIGUATING_KEY : '-'); + translitBuffer.append(EWTSNotACIP + ? WYLIE_DISAMBIGUATING_KEY : '-', + fontSize); } } } else { @@ -1465,17 +1471,24 @@ public class TibTextUtils implements THDLWylieConstants { || (wylie1.equals("'") && (wylie2.equals("g") || wylie2.equals("d") || wylie2.equals("b")))) { if (TibetanMachineWeb.isAmbiguousWylie(wylie1, wylie2)) if (EWTSNotACIP) - translitBuffer.append(wylie1 + WYLIE_DISAMBIGUATING_KEY + wylie2); + translitBuffer.append(wylie1 + + WYLIE_DISAMBIGUATING_KEY + + wylie2, + fontSize); else - translitBuffer.append(acip1 + '-' + acip2); + translitBuffer.append(acip1 + '-' + acip2, + fontSize); else if (EWTSNotACIP) - translitBuffer.append(wylie1 + wylie2); + translitBuffer.append(wylie1 + wylie2, + fontSize); else - translitBuffer.append(acip1 + acip2); + translitBuffer.append(acip1 + acip2, + fontSize); translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie2) - + (EWTSNotACIP ? wylie3 : acip3)); + + (EWTSNotACIP ? wylie3 : acip3), + fontSize); } else { if (EWTSNotACIP) translitBuffer.append(wylie1 @@ -1484,7 +1497,8 @@ public class TibTextUtils implements THDLWylieConstants { wylie2, wylie3, acip2, - acip3)); + acip3), + fontSize); else translitBuffer.append(acip1 + aVowelToUseAfter(EWTSNotACIP, wylie1) @@ -1492,7 +1506,8 @@ public class TibTextUtils implements THDLWylieConstants { wylie2, wylie3, acip2, - acip3)); + acip3), + fontSize); } } else if ("root" == candidateType || "prefix/root-root/suffix" == candidateType @@ -1502,12 +1517,13 @@ public class TibTextUtils implements THDLWylieConstants { String wylie1 = ((TGCPair)gcs.get(0)).getWylie(); String acip1 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(0)).getACIP(); leftover = 1; - translitBuffer.append((EWTSNotACIP) ? wylie1 : acip1); + translitBuffer.append((EWTSNotACIP) ? wylie1 : acip1, fontSize); if (((TGCPair)gcs.get(0)).classification != TGCPair.CONSONANTAL_WITH_VOWEL) { ThdlDebug.verify(TGCPair.CONSONANTAL_WITHOUT_VOWEL == ((TGCPair)gcs.get(0)).classification); - translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie1)); + translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie1), + fontSize); if (debug) System.out.println("DEBUG: appending vowel"); } else { if (debug) System.out.println("DEBUG: already has vowel 2"); @@ -1522,7 +1538,8 @@ public class TibTextUtils implements THDLWylieConstants { wylie2, wylie3, acip2, - acip3)); + acip3), + fontSize); } } else if ("prefix-root-suffix" == candidateType || "prefix-root" == candidateType @@ -1534,21 +1551,24 @@ public class TibTextUtils implements THDLWylieConstants { leftover = 2; if (TibetanMachineWeb.isAmbiguousWylie(wylie1, wylie2)) if (EWTSNotACIP) - translitBuffer.append(wylie1 + WYLIE_DISAMBIGUATING_KEY + wylie2); + translitBuffer.append(wylie1 + WYLIE_DISAMBIGUATING_KEY + wylie2, + fontSize); else - translitBuffer.append(acip1 + '-' + acip2); + translitBuffer.append(acip1 + '-' + acip2, + fontSize); else if (EWTSNotACIP) - translitBuffer.append(wylie1 + wylie2); + translitBuffer.append(wylie1 + wylie2, fontSize); else - translitBuffer.append(acip1 + acip2); + translitBuffer.append(acip1 + acip2, fontSize); if (((TGCPair)gcs.get(1)).classification != TGCPair.CONSONANTAL_WITH_VOWEL) { ThdlDebug.verify(TGCPair.CONSONANTAL_WITHOUT_VOWEL == ((TGCPair)gcs.get(1)).classification); if (debug) System.out.println("DEBUG: appending vowel"); - translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie2)); + translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie2), + fontSize); } else { if (debug) System.out.println("DEBUG: already has vowel 1"); } @@ -1562,7 +1582,8 @@ public class TibTextUtils implements THDLWylieConstants { wylie3, wylie4, acip3, - acip4)); + acip4), + fontSize); } } else if ("number" == candidateType) { leftover = 0; @@ -1577,10 +1598,12 @@ public class TibTextUtils implements THDLWylieConstants { lastPairTranslit = (EWTSNotACIP ? tp.getWylie(null) : tp.getACIP(null)); - if (!translitBuffer.toString().endsWith(lastPairTranslit)) { + if ((translitBuffer.length() == 0) + || !translitBuffer.get(translitBuffer.length() - 1).getTranslit().endsWith(lastPairTranslit)) { int l; if ((l = translitBuffer.length()) > 0) { - char lc = translitBuffer.charAt(l - 1); + String s = translitBuffer.get(l - 1).getTranslit(); + char lc = s.charAt(s.length() - 1); ThdlDebug.verify(lc == ((EWTSNotACIP) ? 'a' : 'A') /* hard-coded ACIP and EWTS values */); lastPairTranslit = lastPairTranslit + lc; /* 'da'i can cause this */ } else { @@ -1594,7 +1617,8 @@ public class TibTextUtils implements THDLWylieConstants { String y; translitBuffer.append(EWTSNotACIP ? (y = tp.getWylie(lastPairTranslit)) - : (y = tp.getACIP(lastPairTranslit))); + : (y = tp.getACIP(lastPairTranslit)), + fontSize); if (appendaged) lastPairTranslit = y; } @@ -1619,23 +1643,23 @@ public class TibTextUtils implements THDLWylieConstants { * corresponding to these glyphs, then noSuch[0] will be set to true * @param warnings either null or a buffer to which will be appended * warnings about illegal tsheg bars -* @return the Extended Wylie/ACIP corresponding to these glyphs, or -* null */ - private static String getTranslitImplementation(boolean EWTSNotACIP, - DuffCode[] dcs, - boolean noSuch[], - StringBuffer warnings) { +* @return the Extended Wylie/ACIP corresponding to these glyphs (with +* font size info), or null */ + private static TranslitList getTranslitImplementation(boolean EWTSNotACIP, + SizedDuffCode[] dcs, + boolean noSuch[], + StringBuffer warnings) { if (dcs.length == 0) return null; ArrayList glyphList = new ArrayList(); - StringBuffer translitBuffer = new StringBuffer(); + TranslitList translitBuffer = new TranslitList(); // DLC FIXME: " " should become " " for ACIP - for (int i=0; i