diff --git a/source/org/thdl/tib/text/DuffCode.java b/source/org/thdl/tib/text/DuffCode.java index dc04e92..4159aa6 100644 --- a/source/org/thdl/tib/text/DuffCode.java +++ b/source/org/thdl/tib/text/DuffCode.java @@ -36,17 +36,17 @@ import org.thdl.util.ThdlDebug; * @author Edward Garrett, Tibetan and Himalayan Digital Library * @author David Chandler */ -public final class DuffCode { +public final /* immutable */ class DuffCode { /** * the font number in which this glyph can be found, from 1 * (TibetanMachineWeb/TibetanMachine) ... to 5 * (TibetanMachineWeb4/TibetanMachineSkt4) ... to 10 * (TibetanMachineWeb9/[Invalid for TM family]). */ - private byte fontNum; + private /* final if the compiler were smarter */ byte fontNum; /** * the character value of this glyph, as an integer (that is, ordinal) */ - private byte charNum; + private /* final if the compiler were smarter */ byte charNum; /** * Called by {@link TibetanMachineWeb} to generate diff --git a/source/org/thdl/tib/text/SizedDuffCode.java b/source/org/thdl/tib/text/SizedDuffCode.java new file mode 100644 index 0000000..0b610e6 --- /dev/null +++ b/source/org/thdl/tib/text/SizedDuffCode.java @@ -0,0 +1,38 @@ +/* +The contents of this file are subject to the THDL Open Community License +Version 1.0 (the "License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License on the THDL web site +(http://www.thdl.org/). + +Software distributed under the License is distributed on an "AS IS" basis, +WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +License for the specific terms governing rights and limitations under the +License. + +The Initial Developer of this software is the Tibetan and Himalayan Digital +Library (THDL). Portions created by the THDL are Copyright 2004 THDL. +All Rights Reserved. + +Contributor(s): ______________________________________. +*/ + +package org.thdl.tib.text; + +/** +* An immutable representation of a Tibetan glyph of a certain size in +* the TibetanMachineWeb or TibetanMachine families of fonts. +* +*

A SizedDuffCode is a pair of a font size and a {@link +* DuffCode}.

+* +* @author David Chandler */ +final /* immutable */ class SizedDuffCode { + private final DuffCode dc; + private final int fontSize; + public SizedDuffCode(DuffCode dc, int fontSize) { + this.dc = dc; + this.fontSize = fontSize; + } + public DuffCode getDuffCode() { return dc; } + public int getFontSize() { return fontSize; } +} diff --git a/source/org/thdl/tib/text/TibTextUtils.java b/source/org/thdl/tib/text/TibTextUtils.java index f0cee0e..6429990 100644 --- a/source/org/thdl/tib/text/TibTextUtils.java +++ b/source/org/thdl/tib/text/TibTextUtils.java @@ -936,13 +936,13 @@ public class TibTextUtils implements THDLWylieConstants { * @param noSuch an array which will not be touched if this is * successful; however, if there is no THDL Extended Wylie/ACIP * corresponding to these glyphs, then noSuch[0] will be set to true -* @return the Extended Wylie/ACIP corresponding to these glyphs, or -* null */ - public static String getTranslit(boolean EWTSNotACIP, - DuffCode[] dcs, - boolean noSuch[]) { +* @return the Extended Wylie/ACIP corresponding to these glyphs (with +* font size info), or null */ + public static TranslitList getTranslit(boolean EWTSNotACIP, + SizedDuffCode[] dcs, + boolean noSuch[]) { StringBuffer warnings = (debug ? new StringBuffer() : null); - String ans + TranslitList ans = getTranslitImplementation(EWTSNotACIP, dcs, noSuch, warnings); if (debug && warnings.length() > 0) System.out.println("DEBUG: warnings in TMW->Wylie: " + warnings); @@ -985,7 +985,7 @@ public class TibTextUtils implements THDLWylieConstants { int pairType = TGCPair.TYPE_OTHER; for (int i = 0; i < sz; i++) { - DuffCode dc = (DuffCode)glyphList.get(i); + DuffCode dc = ((SizedDuffCode)glyphList.get(i)).getDuffCode(); String wylie = TibetanMachineWeb.getWylieForGlyph(dc, noSuchWylie); boolean buildingUpSanskritNext = false; if ((buildingUpSanskritNext @@ -1314,12 +1314,13 @@ public class TibTextUtils implements THDLWylieConstants { } /** Appends to translitBuffer the EWTS/ACIP for the glyph list - glyphList (which should be an ArrayList for speed). This will - be very user-friendly for "legal tsheg bars" and will be - valid, but possibly ugly (interspersed with disambiguators or - extra vowels, etc.) Wylie/ACIP for other things, such as - Sanskrit transliteration. Updates warnings and noSuch like - the caller does. + glyphList (which should be an ArrayList for speed). The font + size of the transliteration will be fontSize. The + transliteration will be very user-friendly for "legal tsheg + bars" and will be valid, but possibly ugly (interspersed with + disambiguators or extra vowels, etc.) Wylie/ACIP for other + things, such as Sanskrit transliteration. Updates warnings + and noSuch like the caller does.

What constitutes a legal, non-punctuation, non-whitespace tsheg bar? The following are the only such:

@@ -1366,7 +1367,10 @@ public class TibTextUtils implements THDLWylieConstants { java.util.List glyphList, boolean noSuch[], StringBuffer warnings, - StringBuffer translitBuffer) { + TranslitList translitBuffer) { + // FIXME: If font size changes within a tsheg-bar, we don't + // handle that. + int fontSize = ((SizedDuffCode)glyphList.get(0)).getFontSize(); TGCList gcs = breakTshegBarIntoGraphemeClusters(glyphList, noSuch); String candidateType = getClassificationOfTshegBar(gcs, warnings, false); @@ -1397,16 +1401,18 @@ public class TibTextUtils implements THDLWylieConstants { // and a.u and a.i, we always do it (see Rule 10 // of the September 1, 2003 draft of EWTS // standard). - translitBuffer.append(WYLIE_DISAMBIGUATING_KEY); + translitBuffer.append(WYLIE_DISAMBIGUATING_KEY, fontSize); } - translitBuffer.append(translit); + translitBuffer.append(translit, fontSize); if (TibetanMachineWeb.isWylieTibetanConsonantOrConsonantStack(wylie) || TibetanMachineWeb.isWylieSanskritConsonantStack(wylie)) { - translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie)); + translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie), fontSize); } else if (i + 1 < sz) { if (TGCPair.CONSONANTAL_WITH_VOWEL != cls && TGCPair.SANSKRIT_WITH_VOWEL != cls) - translitBuffer.append(EWTSNotACIP ? WYLIE_DISAMBIGUATING_KEY : '-'); + translitBuffer.append(EWTSNotACIP + ? WYLIE_DISAMBIGUATING_KEY : '-', + fontSize); } } } else { @@ -1465,17 +1471,24 @@ public class TibTextUtils implements THDLWylieConstants { || (wylie1.equals("'") && (wylie2.equals("g") || wylie2.equals("d") || wylie2.equals("b")))) { if (TibetanMachineWeb.isAmbiguousWylie(wylie1, wylie2)) if (EWTSNotACIP) - translitBuffer.append(wylie1 + WYLIE_DISAMBIGUATING_KEY + wylie2); + translitBuffer.append(wylie1 + + WYLIE_DISAMBIGUATING_KEY + + wylie2, + fontSize); else - translitBuffer.append(acip1 + '-' + acip2); + translitBuffer.append(acip1 + '-' + acip2, + fontSize); else if (EWTSNotACIP) - translitBuffer.append(wylie1 + wylie2); + translitBuffer.append(wylie1 + wylie2, + fontSize); else - translitBuffer.append(acip1 + acip2); + translitBuffer.append(acip1 + acip2, + fontSize); translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie2) - + (EWTSNotACIP ? wylie3 : acip3)); + + (EWTSNotACIP ? wylie3 : acip3), + fontSize); } else { if (EWTSNotACIP) translitBuffer.append(wylie1 @@ -1484,7 +1497,8 @@ public class TibTextUtils implements THDLWylieConstants { wylie2, wylie3, acip2, - acip3)); + acip3), + fontSize); else translitBuffer.append(acip1 + aVowelToUseAfter(EWTSNotACIP, wylie1) @@ -1492,7 +1506,8 @@ public class TibTextUtils implements THDLWylieConstants { wylie2, wylie3, acip2, - acip3)); + acip3), + fontSize); } } else if ("root" == candidateType || "prefix/root-root/suffix" == candidateType @@ -1502,12 +1517,13 @@ public class TibTextUtils implements THDLWylieConstants { String wylie1 = ((TGCPair)gcs.get(0)).getWylie(); String acip1 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(0)).getACIP(); leftover = 1; - translitBuffer.append((EWTSNotACIP) ? wylie1 : acip1); + translitBuffer.append((EWTSNotACIP) ? wylie1 : acip1, fontSize); if (((TGCPair)gcs.get(0)).classification != TGCPair.CONSONANTAL_WITH_VOWEL) { ThdlDebug.verify(TGCPair.CONSONANTAL_WITHOUT_VOWEL == ((TGCPair)gcs.get(0)).classification); - translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie1)); + translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie1), + fontSize); if (debug) System.out.println("DEBUG: appending vowel"); } else { if (debug) System.out.println("DEBUG: already has vowel 2"); @@ -1522,7 +1538,8 @@ public class TibTextUtils implements THDLWylieConstants { wylie2, wylie3, acip2, - acip3)); + acip3), + fontSize); } } else if ("prefix-root-suffix" == candidateType || "prefix-root" == candidateType @@ -1534,21 +1551,24 @@ public class TibTextUtils implements THDLWylieConstants { leftover = 2; if (TibetanMachineWeb.isAmbiguousWylie(wylie1, wylie2)) if (EWTSNotACIP) - translitBuffer.append(wylie1 + WYLIE_DISAMBIGUATING_KEY + wylie2); + translitBuffer.append(wylie1 + WYLIE_DISAMBIGUATING_KEY + wylie2, + fontSize); else - translitBuffer.append(acip1 + '-' + acip2); + translitBuffer.append(acip1 + '-' + acip2, + fontSize); else if (EWTSNotACIP) - translitBuffer.append(wylie1 + wylie2); + translitBuffer.append(wylie1 + wylie2, fontSize); else - translitBuffer.append(acip1 + acip2); + translitBuffer.append(acip1 + acip2, fontSize); if (((TGCPair)gcs.get(1)).classification != TGCPair.CONSONANTAL_WITH_VOWEL) { ThdlDebug.verify(TGCPair.CONSONANTAL_WITHOUT_VOWEL == ((TGCPair)gcs.get(1)).classification); if (debug) System.out.println("DEBUG: appending vowel"); - translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie2)); + translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie2), + fontSize); } else { if (debug) System.out.println("DEBUG: already has vowel 1"); } @@ -1562,7 +1582,8 @@ public class TibTextUtils implements THDLWylieConstants { wylie3, wylie4, acip3, - acip4)); + acip4), + fontSize); } } else if ("number" == candidateType) { leftover = 0; @@ -1577,10 +1598,12 @@ public class TibTextUtils implements THDLWylieConstants { lastPairTranslit = (EWTSNotACIP ? tp.getWylie(null) : tp.getACIP(null)); - if (!translitBuffer.toString().endsWith(lastPairTranslit)) { + if ((translitBuffer.length() == 0) + || !translitBuffer.get(translitBuffer.length() - 1).getTranslit().endsWith(lastPairTranslit)) { int l; if ((l = translitBuffer.length()) > 0) { - char lc = translitBuffer.charAt(l - 1); + String s = translitBuffer.get(l - 1).getTranslit(); + char lc = s.charAt(s.length() - 1); ThdlDebug.verify(lc == ((EWTSNotACIP) ? 'a' : 'A') /* hard-coded ACIP and EWTS values */); lastPairTranslit = lastPairTranslit + lc; /* 'da'i can cause this */ } else { @@ -1594,7 +1617,8 @@ public class TibTextUtils implements THDLWylieConstants { String y; translitBuffer.append(EWTSNotACIP ? (y = tp.getWylie(lastPairTranslit)) - : (y = tp.getACIP(lastPairTranslit))); + : (y = tp.getACIP(lastPairTranslit)), + fontSize); if (appendaged) lastPairTranslit = y; } @@ -1619,23 +1643,23 @@ public class TibTextUtils implements THDLWylieConstants { * corresponding to these glyphs, then noSuch[0] will be set to true * @param warnings either null or a buffer to which will be appended * warnings about illegal tsheg bars -* @return the Extended Wylie/ACIP corresponding to these glyphs, or -* null */ - private static String getTranslitImplementation(boolean EWTSNotACIP, - DuffCode[] dcs, - boolean noSuch[], - StringBuffer warnings) { +* @return the Extended Wylie/ACIP corresponding to these glyphs (with +* font size info), or null */ + private static TranslitList getTranslitImplementation(boolean EWTSNotACIP, + SizedDuffCode[] dcs, + boolean noSuch[], + StringBuffer warnings) { if (dcs.length == 0) return null; ArrayList glyphList = new ArrayList(); - StringBuffer translitBuffer = new StringBuffer(); + TranslitList translitBuffer = new TranslitList(); // DLC FIXME: " " should become " " for ACIP - for (int i=0; i 0 && dcs[i - 1].getCharacter() == '\r') - translitBuffer.append("\r\n"); + if (i > 0 + && dcs[i - 1].getDuffCode().getCharacter() == '\r') + translitBuffer.append("\r\n", fsz); else - translitBuffer.append(ch); + translitBuffer.append(ch, fsz); } - translitBuffer.append(ch); + translitBuffer.append(ch, fsz); } else { - String wylie = TibetanMachineWeb.getWylieForGlyph(dcs[i], noSuch); + String wylie + = TibetanMachineWeb.getWylieForGlyph(dcs[i].getDuffCode(), + noSuch); String acip = null; if (!EWTSNotACIP) { // U+0F04 and U+0F05 -- these require lookahead to @@ -1665,12 +1692,12 @@ public class TibTextUtils implements THDLWylieConstants { int howManyConsumed[] = new int[] { -1 /* invalid */ }; - acip = TibetanMachineWeb.getACIPForGlyph(dcs[i], + acip = TibetanMachineWeb.getACIPForGlyph(dcs[i].getDuffCode(), ((i+1 0) { - return translitBuffer.toString(); - } - else + return translitBuffer; + } else { return null; + } } /** Returns "root" instead of "appendaged-root", for example. */ diff --git a/source/org/thdl/tib/text/TibetanDocument.java b/source/org/thdl/tib/text/TibetanDocument.java index 0846679..238b569 100644 --- a/source/org/thdl/tib/text/TibetanDocument.java +++ b/source/org/thdl/tib/text/TibetanDocument.java @@ -382,69 +382,69 @@ public class TibetanDocument extends DefaultStyledDocument { return getTranslit(false, begin, end, noSuchACIP); } - private String getTranslit(boolean EWTSNotACIP, int begin, int end, boolean noSuch[]) { - AttributeSet attr; - String fontName; - int fontNum; - DuffCode dc; - char ch; + private String getTranslit(boolean EWTSNotACIP, int begin, int end, boolean noSuch[]) { + AttributeSet attr; + String fontName; + int fontNum; + char ch; - if (begin >= end) - return ""; + if (begin >= end) + return ""; - java.util.List dcs = new ArrayList(); - int i = begin; - StringBuffer translitBuffer = new StringBuffer(); + java.util.List dcs = new ArrayList(); + int i = begin; + TranslitList translitBuffer = new TranslitList(); - try { - while (i < end) { - attr = getCharacterElement(i).getAttributes(); - fontName = StyleConstants.getFontFamily(attr); + try { + while (i < end) { + attr = getCharacterElement(i).getAttributes(); + fontName = StyleConstants.getFontFamily(attr); + int fsz + = ((Integer)attr.getAttribute(StyleConstants.FontSize)).intValue(); - ch = getText(i,1).charAt(0); + ch = getText(i,1).charAt(0); - //current character is formatting - if (ch == '\n' || ch == '\t') { - if (dcs.size() > 0) { - DuffCode[] dc_array = new DuffCode[0]; - dc_array = (DuffCode[])dcs.toArray(dc_array); - translitBuffer.append(TibTextUtils.getTranslit(EWTSNotACIP, dc_array, noSuch)); - dcs.clear(); - } - translitBuffer.append(ch); - } + //current character is formatting + if (ch == '\n' || ch == '\t') { + if (dcs.size() > 0) { + SizedDuffCode[] dc_array + = (SizedDuffCode[])dcs.toArray(new SizedDuffCode[0]); + translitBuffer.append(TibTextUtils.getTranslit(EWTSNotACIP, dc_array, noSuch)); + dcs.clear(); + } + translitBuffer.append(ch, fsz); + } + //current character isn't TMW + else if ((0 == (fontNum = TibetanMachineWeb.getTMWFontNumber(fontName)))) { + if (dcs.size() > 0) { + SizedDuffCode[] dc_array + = (SizedDuffCode[])dcs.toArray(new SizedDuffCode[0]); + translitBuffer.append(TibTextUtils.getTranslit(EWTSNotACIP, dc_array, noSuch)); + dcs.clear(); + } + } + //current character is convertable + else { + dcs.add(new SizedDuffCode(new DuffCode(fontNum, ch), fsz)); + } + i++; + } + if (dcs.size() > 0) { + SizedDuffCode[] dc_array + = (SizedDuffCode[])dcs.toArray(new SizedDuffCode[0]); + translitBuffer.append(TibTextUtils.getTranslit(EWTSNotACIP, + dc_array, + noSuch)); + } + return translitBuffer.getString(); + } + catch (BadLocationException ble) { + ble.printStackTrace(); + ThdlDebug.noteIffyCode(); + } - //current character isn't TMW - else if ((0 == (fontNum = TibetanMachineWeb.getTMWFontNumber(fontName)))) { - if (dcs.size() > 0) { - DuffCode[] dc_array = new DuffCode[0]; - dc_array = (DuffCode[])dcs.toArray(dc_array); - translitBuffer.append(TibTextUtils.getTranslit(EWTSNotACIP, dc_array, noSuch)); - dcs.clear(); - } - } - - //current character is convertable - else { - dc = new DuffCode(fontNum, ch); - dcs.add(dc); - } - i++; - } - if (dcs.size() > 0) { - DuffCode[] dc_array = new DuffCode[0]; - dc_array = (DuffCode[])dcs.toArray(dc_array); - translitBuffer.append(TibTextUtils.getTranslit(EWTSNotACIP, dc_array, noSuch)); - } - return translitBuffer.toString(); - } - catch (BadLocationException ble) { - ble.printStackTrace(); - ThdlDebug.noteIffyCode(); - } - - return ""; - } + return ""; + } /** Prints to standard output a list of all the indices of characters that are not in a TMW font within the range [start, @@ -1202,8 +1202,6 @@ public class TibetanDocument extends DefaultStyledDocument { try { boolean noSuchWylie[] = new boolean[] { false }; - DuffCode[] any_dc_array = new DuffCode[0]; - DuffCode[] dc_array; Position endPos = createPosition(end); int i = start; java.util.List dcs = new ArrayList(); @@ -1213,39 +1211,46 @@ public class TibetanDocument extends DefaultStyledDocument { = getCharacterElement(i).getAttributes(); String fontName = StyleConstants.getFontFamily(attr); int fontNum; + int iFontSize = 72; /* the failure ought to be obvious + at this size */ + try { + iFontSize + = ((Integer)attr.getAttribute(StyleConstants.FontSize)).intValue(); + } catch (Exception e) { + // leave it as 72 + } if ((0 == (fontNum = TibetanMachineWeb.getTMWFontNumber(fontName))) || i==endPos.getOffset()) { if (i != start) { - dc_array = (DuffCode[])dcs.toArray(any_dc_array); - - /* Low-priority FIXME: If the font size - changes within a tsheg bar, the roman - output will not mimic such changes. */ - - // SPEED_FIXME: determining font size might be slow - int fontSize = 72; /* the failure ought to be - obvious at this size */ - try { - fontSize = ((Integer)getCharacterElement(start).getAttributes().getAttribute(StyleConstants.FontSize)).intValue(); - } catch (Exception e) { - // leave it as 72 - } + SizedDuffCode[] sdc_array + = (SizedDuffCode[])dcs.toArray(new SizedDuffCode[0]); remove(start, i-start); ThdlDebug.verify(getRomanAttributeSet() != null); - insertString(start, - TibTextUtils.getTranslit(EWTSNotACIP, - dc_array, - noSuchWylie), - getCopyOfRomanAttributeSet(fontSize)); + TranslitList tb + = TibTextUtils.getTranslit(EWTSNotACIP, + sdc_array, + noSuchWylie); + int lastFontSize = -1; + for (int j = 0; j < tb.length(); j++) { + TranslitTuple tt = tb.get(j); + int thisFontSize; + insertString(start, + tt.getTranslit(), + getCopyOfRomanAttributeSet(thisFontSize = tt.getFontSize())); + if (thisFontSize == lastFontSize) + throw new Error("FIXME: make this an assertion"); + lastFontSize = thisFontSize; + } dcs.clear(); } start = i+1; } else { char ch = getText(i,1).charAt(0); - dcs.add(new DuffCode(fontNum, ch)); + dcs.add(new SizedDuffCode(new DuffCode(fontNum, ch), + iFontSize)); ++numAttemptedReplacements[0]; } diff --git a/source/org/thdl/tib/text/TranslitList.java b/source/org/thdl/tib/text/TranslitList.java new file mode 100644 index 0000000..9f3da2a --- /dev/null +++ b/source/org/thdl/tib/text/TranslitList.java @@ -0,0 +1,109 @@ +/* +The contents of this file are subject to the THDL Open Community License +Version 1.0 (the "License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License on the THDL web site +(http://www.thdl.org/). + +Software distributed under the License is distributed on an "AS IS" basis, +WITHOUT WARRANTY OF ANY KIND, either express or implied. See the +License for the specific terms governing rights and limitations under the +License. + +The Initial Developer of this software is the Tibetan and Himalayan Digital +Library (THDL). Portions created by the THDL are Copyright 2004 THDL. +All Rights Reserved. + +Contributor(s): ______________________________________. +*/ + +package org.thdl.tib.text; + +import java.util.Vector; + +/** +* A mutable representation of Roman transliteration with font size +* information for each character of transliteration. +* +* @author David Chandler */ +class TranslitList { + /** Invariant: For all 0<=i 0); + } + + /** Appends the transliteration s to this tuple and returns this + tuple if sz, the font size for s, is the same as this tuple's + font size. Returns a new tuple for s otherwise. */ + public TranslitTuple getPossiblyCombinedTranslitTuple(String s, int sz) { + if (this.sz == sz) { + sb.append(s); + return this; + } else { + return new TranslitTuple(s, sz); + } + } + + /** Returns the stretch of Roman transliteration. */ + public String getTranslit() { return sb.toString(); } + + /** Returns the font size of the Roman transliteration. */ + public int getFontSize() { return sz; } + + /** Do not call this -- it throws an error. */ + public String toString() { + throw new Error("There was a bug where this was called, so don't call this."); + } +}