I thought my earlier commit preserved font size info for TMW->ACIP/Wylie

conversions.  It was only at a very coarse level.  The feature is now truly
here.
This commit is contained in:
dchandler 2004-06-20 02:57:28 +00:00
parent 8ccf57dccb
commit 14fb449f95
6 changed files with 383 additions and 142 deletions

View file

@ -36,17 +36,17 @@ import org.thdl.util.ThdlDebug;
* @author Edward Garrett, Tibetan and Himalayan Digital Library * @author Edward Garrett, Tibetan and Himalayan Digital Library
* @author David Chandler */ * @author David Chandler */
public final class DuffCode { public final /* immutable */ class DuffCode {
/** /**
* the font number in which this glyph can be found, from 1 * the font number in which this glyph can be found, from 1
* (TibetanMachineWeb/TibetanMachine) ... to 5 * (TibetanMachineWeb/TibetanMachine) ... to 5
* (TibetanMachineWeb4/TibetanMachineSkt4) ... to 10 * (TibetanMachineWeb4/TibetanMachineSkt4) ... to 10
* (TibetanMachineWeb9/[Invalid for TM family]). */ * (TibetanMachineWeb9/[Invalid for TM family]). */
private byte fontNum; private /* final if the compiler were smarter */ byte fontNum;
/** /**
* the character value of this glyph, as an integer (that is, ordinal) * the character value of this glyph, as an integer (that is, ordinal)
*/ */
private byte charNum; private /* final if the compiler were smarter */ byte charNum;
/** /**
* Called by {@link TibetanMachineWeb} to generate * Called by {@link TibetanMachineWeb} to generate

View file

@ -0,0 +1,38 @@
/*
The contents of this file are subject to the THDL Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License on the THDL web site
(http://www.thdl.org/).
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific terms governing rights and limitations under the
License.
The Initial Developer of this software is the Tibetan and Himalayan Digital
Library (THDL). Portions created by the THDL are Copyright 2004 THDL.
All Rights Reserved.
Contributor(s): ______________________________________.
*/
package org.thdl.tib.text;
/**
* An immutable representation of a Tibetan glyph of a certain size in
* the TibetanMachineWeb or TibetanMachine families of fonts.
*
* <p>A SizedDuffCode is a pair of a font size and a {@link
* DuffCode}.</p>
*
* @author David Chandler */
final /* immutable */ class SizedDuffCode {
private final DuffCode dc;
private final int fontSize;
public SizedDuffCode(DuffCode dc, int fontSize) {
this.dc = dc;
this.fontSize = fontSize;
}
public DuffCode getDuffCode() { return dc; }
public int getFontSize() { return fontSize; }
}

View file

@ -936,13 +936,13 @@ public class TibTextUtils implements THDLWylieConstants {
* @param noSuch an array which will not be touched if this is * @param noSuch an array which will not be touched if this is
* successful; however, if there is no THDL Extended Wylie/ACIP * successful; however, if there is no THDL Extended Wylie/ACIP
* corresponding to these glyphs, then noSuch[0] will be set to true * corresponding to these glyphs, then noSuch[0] will be set to true
* @return the Extended Wylie/ACIP corresponding to these glyphs, or * @return the Extended Wylie/ACIP corresponding to these glyphs (with
* null */ * font size info), or null */
public static String getTranslit(boolean EWTSNotACIP, public static TranslitList getTranslit(boolean EWTSNotACIP,
DuffCode[] dcs, SizedDuffCode[] dcs,
boolean noSuch[]) { boolean noSuch[]) {
StringBuffer warnings = (debug ? new StringBuffer() : null); StringBuffer warnings = (debug ? new StringBuffer() : null);
String ans TranslitList ans
= getTranslitImplementation(EWTSNotACIP, dcs, noSuch, warnings); = getTranslitImplementation(EWTSNotACIP, dcs, noSuch, warnings);
if (debug && warnings.length() > 0) if (debug && warnings.length() > 0)
System.out.println("DEBUG: warnings in TMW->Wylie: " + warnings); System.out.println("DEBUG: warnings in TMW->Wylie: " + warnings);
@ -985,7 +985,7 @@ public class TibTextUtils implements THDLWylieConstants {
int pairType = TGCPair.TYPE_OTHER; int pairType = TGCPair.TYPE_OTHER;
for (int i = 0; i < sz; i++) { for (int i = 0; i < sz; i++) {
DuffCode dc = (DuffCode)glyphList.get(i); DuffCode dc = ((SizedDuffCode)glyphList.get(i)).getDuffCode();
String wylie = TibetanMachineWeb.getWylieForGlyph(dc, noSuchWylie); String wylie = TibetanMachineWeb.getWylieForGlyph(dc, noSuchWylie);
boolean buildingUpSanskritNext = false; boolean buildingUpSanskritNext = false;
if ((buildingUpSanskritNext if ((buildingUpSanskritNext
@ -1314,12 +1314,13 @@ public class TibTextUtils implements THDLWylieConstants {
} }
/** Appends to translitBuffer the EWTS/ACIP for the glyph list /** Appends to translitBuffer the EWTS/ACIP for the glyph list
glyphList (which should be an ArrayList for speed). This will glyphList (which should be an ArrayList for speed). The font
be very user-friendly for "legal tsheg bars" and will be size of the transliteration will be fontSize. The
valid, but possibly ugly (interspersed with disambiguators or transliteration will be very user-friendly for "legal tsheg
extra vowels, etc.) Wylie/ACIP for other things, such as bars" and will be valid, but possibly ugly (interspersed with
Sanskrit transliteration. Updates warnings and noSuch like disambiguators or extra vowels, etc.) Wylie/ACIP for other
the caller does. things, such as Sanskrit transliteration. Updates warnings
and noSuch like the caller does.
<p>What constitutes a legal, non-punctuation, non-whitespace <p>What constitutes a legal, non-punctuation, non-whitespace
tsheg bar? The following are the only such:</p> tsheg bar? The following are the only such:</p>
@ -1366,7 +1367,10 @@ public class TibTextUtils implements THDLWylieConstants {
java.util.List glyphList, java.util.List glyphList,
boolean noSuch[], boolean noSuch[],
StringBuffer warnings, StringBuffer warnings,
StringBuffer translitBuffer) { TranslitList translitBuffer) {
// FIXME: If font size changes within a tsheg-bar, we don't
// handle that.
int fontSize = ((SizedDuffCode)glyphList.get(0)).getFontSize();
TGCList gcs TGCList gcs
= breakTshegBarIntoGraphemeClusters(glyphList, noSuch); = breakTshegBarIntoGraphemeClusters(glyphList, noSuch);
String candidateType = getClassificationOfTshegBar(gcs, warnings, false); String candidateType = getClassificationOfTshegBar(gcs, warnings, false);
@ -1397,16 +1401,18 @@ public class TibTextUtils implements THDLWylieConstants {
// and a.u and a.i, we always do it (see Rule 10 // and a.u and a.i, we always do it (see Rule 10
// of the September 1, 2003 draft of EWTS // of the September 1, 2003 draft of EWTS
// standard). // standard).
translitBuffer.append(WYLIE_DISAMBIGUATING_KEY); translitBuffer.append(WYLIE_DISAMBIGUATING_KEY, fontSize);
} }
translitBuffer.append(translit); translitBuffer.append(translit, fontSize);
if (TibetanMachineWeb.isWylieTibetanConsonantOrConsonantStack(wylie) if (TibetanMachineWeb.isWylieTibetanConsonantOrConsonantStack(wylie)
|| TibetanMachineWeb.isWylieSanskritConsonantStack(wylie)) { || TibetanMachineWeb.isWylieSanskritConsonantStack(wylie)) {
translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie)); translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie), fontSize);
} else if (i + 1 < sz) { } else if (i + 1 < sz) {
if (TGCPair.CONSONANTAL_WITH_VOWEL != cls if (TGCPair.CONSONANTAL_WITH_VOWEL != cls
&& TGCPair.SANSKRIT_WITH_VOWEL != cls) && TGCPair.SANSKRIT_WITH_VOWEL != cls)
translitBuffer.append(EWTSNotACIP ? WYLIE_DISAMBIGUATING_KEY : '-'); translitBuffer.append(EWTSNotACIP
? WYLIE_DISAMBIGUATING_KEY : '-',
fontSize);
} }
} }
} else { } else {
@ -1465,17 +1471,24 @@ public class TibTextUtils implements THDLWylieConstants {
|| (wylie1.equals("'") && (wylie2.equals("g") || wylie2.equals("d") || wylie2.equals("b")))) { || (wylie1.equals("'") && (wylie2.equals("g") || wylie2.equals("d") || wylie2.equals("b")))) {
if (TibetanMachineWeb.isAmbiguousWylie(wylie1, wylie2)) if (TibetanMachineWeb.isAmbiguousWylie(wylie1, wylie2))
if (EWTSNotACIP) if (EWTSNotACIP)
translitBuffer.append(wylie1 + WYLIE_DISAMBIGUATING_KEY + wylie2); translitBuffer.append(wylie1
+ WYLIE_DISAMBIGUATING_KEY
+ wylie2,
fontSize);
else else
translitBuffer.append(acip1 + '-' + acip2); translitBuffer.append(acip1 + '-' + acip2,
fontSize);
else else
if (EWTSNotACIP) if (EWTSNotACIP)
translitBuffer.append(wylie1 + wylie2); translitBuffer.append(wylie1 + wylie2,
fontSize);
else else
translitBuffer.append(acip1 + acip2); translitBuffer.append(acip1 + acip2,
fontSize);
translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie2) translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie2)
+ (EWTSNotACIP ? wylie3 : acip3)); + (EWTSNotACIP ? wylie3 : acip3),
fontSize);
} else { } else {
if (EWTSNotACIP) if (EWTSNotACIP)
translitBuffer.append(wylie1 translitBuffer.append(wylie1
@ -1484,7 +1497,8 @@ public class TibTextUtils implements THDLWylieConstants {
wylie2, wylie2,
wylie3, wylie3,
acip2, acip2,
acip3)); acip3),
fontSize);
else else
translitBuffer.append(acip1 translitBuffer.append(acip1
+ aVowelToUseAfter(EWTSNotACIP, wylie1) + aVowelToUseAfter(EWTSNotACIP, wylie1)
@ -1492,7 +1506,8 @@ public class TibTextUtils implements THDLWylieConstants {
wylie2, wylie2,
wylie3, wylie3,
acip2, acip2,
acip3)); acip3),
fontSize);
} }
} else if ("root" == candidateType } else if ("root" == candidateType
|| "prefix/root-root/suffix" == candidateType || "prefix/root-root/suffix" == candidateType
@ -1502,12 +1517,13 @@ public class TibTextUtils implements THDLWylieConstants {
String wylie1 = ((TGCPair)gcs.get(0)).getWylie(); String wylie1 = ((TGCPair)gcs.get(0)).getWylie();
String acip1 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(0)).getACIP(); String acip1 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(0)).getACIP();
leftover = 1; leftover = 1;
translitBuffer.append((EWTSNotACIP) ? wylie1 : acip1); translitBuffer.append((EWTSNotACIP) ? wylie1 : acip1, fontSize);
if (((TGCPair)gcs.get(0)).classification if (((TGCPair)gcs.get(0)).classification
!= TGCPair.CONSONANTAL_WITH_VOWEL) { != TGCPair.CONSONANTAL_WITH_VOWEL) {
ThdlDebug.verify(TGCPair.CONSONANTAL_WITHOUT_VOWEL ThdlDebug.verify(TGCPair.CONSONANTAL_WITHOUT_VOWEL
== ((TGCPair)gcs.get(0)).classification); == ((TGCPair)gcs.get(0)).classification);
translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie1)); translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie1),
fontSize);
if (debug) System.out.println("DEBUG: appending vowel"); if (debug) System.out.println("DEBUG: appending vowel");
} else { } else {
if (debug) System.out.println("DEBUG: already has vowel 2"); if (debug) System.out.println("DEBUG: already has vowel 2");
@ -1522,7 +1538,8 @@ public class TibTextUtils implements THDLWylieConstants {
wylie2, wylie2,
wylie3, wylie3,
acip2, acip2,
acip3)); acip3),
fontSize);
} }
} else if ("prefix-root-suffix" == candidateType } else if ("prefix-root-suffix" == candidateType
|| "prefix-root" == candidateType || "prefix-root" == candidateType
@ -1534,21 +1551,24 @@ public class TibTextUtils implements THDLWylieConstants {
leftover = 2; leftover = 2;
if (TibetanMachineWeb.isAmbiguousWylie(wylie1, wylie2)) if (TibetanMachineWeb.isAmbiguousWylie(wylie1, wylie2))
if (EWTSNotACIP) if (EWTSNotACIP)
translitBuffer.append(wylie1 + WYLIE_DISAMBIGUATING_KEY + wylie2); translitBuffer.append(wylie1 + WYLIE_DISAMBIGUATING_KEY + wylie2,
fontSize);
else else
translitBuffer.append(acip1 + '-' + acip2); translitBuffer.append(acip1 + '-' + acip2,
fontSize);
else else
if (EWTSNotACIP) if (EWTSNotACIP)
translitBuffer.append(wylie1 + wylie2); translitBuffer.append(wylie1 + wylie2, fontSize);
else else
translitBuffer.append(acip1 + acip2); translitBuffer.append(acip1 + acip2, fontSize);
if (((TGCPair)gcs.get(1)).classification if (((TGCPair)gcs.get(1)).classification
!= TGCPair.CONSONANTAL_WITH_VOWEL) { != TGCPair.CONSONANTAL_WITH_VOWEL) {
ThdlDebug.verify(TGCPair.CONSONANTAL_WITHOUT_VOWEL ThdlDebug.verify(TGCPair.CONSONANTAL_WITHOUT_VOWEL
== ((TGCPair)gcs.get(1)).classification); == ((TGCPair)gcs.get(1)).classification);
if (debug) System.out.println("DEBUG: appending vowel"); if (debug) System.out.println("DEBUG: appending vowel");
translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie2)); translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie2),
fontSize);
} else { } else {
if (debug) System.out.println("DEBUG: already has vowel 1"); if (debug) System.out.println("DEBUG: already has vowel 1");
} }
@ -1562,7 +1582,8 @@ public class TibTextUtils implements THDLWylieConstants {
wylie3, wylie3,
wylie4, wylie4,
acip3, acip3,
acip4)); acip4),
fontSize);
} }
} else if ("number" == candidateType) { } else if ("number" == candidateType) {
leftover = 0; leftover = 0;
@ -1577,10 +1598,12 @@ public class TibTextUtils implements THDLWylieConstants {
lastPairTranslit = (EWTSNotACIP lastPairTranslit = (EWTSNotACIP
? tp.getWylie(null) ? tp.getWylie(null)
: tp.getACIP(null)); : tp.getACIP(null));
if (!translitBuffer.toString().endsWith(lastPairTranslit)) { if ((translitBuffer.length() == 0)
|| !translitBuffer.get(translitBuffer.length() - 1).getTranslit().endsWith(lastPairTranslit)) {
int l; int l;
if ((l = translitBuffer.length()) > 0) { if ((l = translitBuffer.length()) > 0) {
char lc = translitBuffer.charAt(l - 1); String s = translitBuffer.get(l - 1).getTranslit();
char lc = s.charAt(s.length() - 1);
ThdlDebug.verify(lc == ((EWTSNotACIP) ? 'a' : 'A') /* hard-coded ACIP and EWTS values */); ThdlDebug.verify(lc == ((EWTSNotACIP) ? 'a' : 'A') /* hard-coded ACIP and EWTS values */);
lastPairTranslit = lastPairTranslit + lc; /* 'da'i can cause this */ lastPairTranslit = lastPairTranslit + lc; /* 'da'i can cause this */
} else { } else {
@ -1594,7 +1617,8 @@ public class TibTextUtils implements THDLWylieConstants {
String y; String y;
translitBuffer.append(EWTSNotACIP translitBuffer.append(EWTSNotACIP
? (y = tp.getWylie(lastPairTranslit)) ? (y = tp.getWylie(lastPairTranslit))
: (y = tp.getACIP(lastPairTranslit))); : (y = tp.getACIP(lastPairTranslit)),
fontSize);
if (appendaged) if (appendaged)
lastPairTranslit = y; lastPairTranslit = y;
} }
@ -1619,23 +1643,23 @@ public class TibTextUtils implements THDLWylieConstants {
* corresponding to these glyphs, then noSuch[0] will be set to true * corresponding to these glyphs, then noSuch[0] will be set to true
* @param warnings either null or a buffer to which will be appended * @param warnings either null or a buffer to which will be appended
* warnings about illegal tsheg bars * warnings about illegal tsheg bars
* @return the Extended Wylie/ACIP corresponding to these glyphs, or * @return the Extended Wylie/ACIP corresponding to these glyphs (with
* null */ * font size info), or null */
private static String getTranslitImplementation(boolean EWTSNotACIP, private static TranslitList getTranslitImplementation(boolean EWTSNotACIP,
DuffCode[] dcs, SizedDuffCode[] dcs,
boolean noSuch[], boolean noSuch[],
StringBuffer warnings) { StringBuffer warnings) {
if (dcs.length == 0) if (dcs.length == 0)
return null; return null;
ArrayList glyphList = new ArrayList(); ArrayList glyphList = new ArrayList();
StringBuffer translitBuffer = new StringBuffer(); TranslitList translitBuffer = new TranslitList();
// DLC FIXME: " " should become " " for ACIP // DLC FIXME: " " should become " " for ACIP
for (int i=0; i<dcs.length; i++) { for (int i = 0; i < dcs.length; i++) {
char ch = dcs[i].getCharacter(); char ch = dcs[i].getDuffCode().getCharacter();
int k = dcs[i].getCharNum(); int k = dcs[i].getDuffCode().getCharNum();
// int fontNum = dcs[i].getFontNum(); int fsz = dcs[i].getFontSize();
if (k < 32) { if (k < 32) {
if (!glyphList.isEmpty()) { if (!glyphList.isEmpty()) {
@ -1649,14 +1673,17 @@ public class TibTextUtils implements THDLWylieConstants {
// In ACIP, \n\n (or \r\n\r\n with DOS line feeds) // In ACIP, \n\n (or \r\n\r\n with DOS line feeds)
// indicates a real line break. // indicates a real line break.
if (!EWTSNotACIP && '\n' == ch) { if (!EWTSNotACIP && '\n' == ch) {
if (i > 0 && dcs[i - 1].getCharacter() == '\r') if (i > 0
translitBuffer.append("\r\n"); && dcs[i - 1].getDuffCode().getCharacter() == '\r')
translitBuffer.append("\r\n", fsz);
else else
translitBuffer.append(ch); translitBuffer.append(ch, fsz);
} }
translitBuffer.append(ch); translitBuffer.append(ch, fsz);
} else { } else {
String wylie = TibetanMachineWeb.getWylieForGlyph(dcs[i], noSuch); String wylie
= TibetanMachineWeb.getWylieForGlyph(dcs[i].getDuffCode(),
noSuch);
String acip = null; String acip = null;
if (!EWTSNotACIP) { if (!EWTSNotACIP) {
// U+0F04 and U+0F05 -- these require lookahead to // U+0F04 and U+0F05 -- these require lookahead to
@ -1665,12 +1692,12 @@ public class TibTextUtils implements THDLWylieConstants {
int howManyConsumed[] = new int[] { -1 /* invalid */ }; int howManyConsumed[] = new int[] { -1 /* invalid */ };
acip = TibetanMachineWeb.getACIPForGlyph(dcs[i], acip = TibetanMachineWeb.getACIPForGlyph(dcs[i].getDuffCode(),
((i+1<dcs.length) ((i+1<dcs.length)
? dcs[i+1] ? dcs[i+1].getDuffCode()
: null), : null),
((i+2<dcs.length) ((i+2<dcs.length)
? dcs[i+2] ? dcs[i+2].getDuffCode()
: null), : null),
noSuch, noSuch,
howManyConsumed); howManyConsumed);
@ -1690,7 +1717,8 @@ public class TibTextUtils implements THDLWylieConstants {
warnings, translitBuffer); warnings, translitBuffer);
glyphList.clear(); glyphList.clear();
} }
translitBuffer.append(EWTSNotACIP ? wylie : acip); //append the punctuation //append the punctuation:
translitBuffer.append(EWTSNotACIP ? wylie : acip, fsz);
} else { } else {
glyphList.add(dcs[i]); glyphList.add(dcs[i]);
} }
@ -1708,10 +1736,10 @@ public class TibTextUtils implements THDLWylieConstants {
} }
if (translitBuffer.length() > 0) { if (translitBuffer.length() > 0) {
return translitBuffer.toString(); return translitBuffer;
} } else {
else
return null; return null;
}
} }
/** Returns "root" instead of "appendaged-root", for example. */ /** Returns "root" instead of "appendaged-root", for example. */

View file

@ -382,69 +382,69 @@ public class TibetanDocument extends DefaultStyledDocument {
return getTranslit(false, begin, end, noSuchACIP); return getTranslit(false, begin, end, noSuchACIP);
} }
private String getTranslit(boolean EWTSNotACIP, int begin, int end, boolean noSuch[]) { private String getTranslit(boolean EWTSNotACIP, int begin, int end, boolean noSuch[]) {
AttributeSet attr; AttributeSet attr;
String fontName; String fontName;
int fontNum; int fontNum;
DuffCode dc; char ch;
char ch;
if (begin >= end) if (begin >= end)
return ""; return "";
java.util.List dcs = new ArrayList(); java.util.List dcs = new ArrayList();
int i = begin; int i = begin;
StringBuffer translitBuffer = new StringBuffer(); TranslitList translitBuffer = new TranslitList();
try { try {
while (i < end) { while (i < end) {
attr = getCharacterElement(i).getAttributes(); attr = getCharacterElement(i).getAttributes();
fontName = StyleConstants.getFontFamily(attr); fontName = StyleConstants.getFontFamily(attr);
int fsz
= ((Integer)attr.getAttribute(StyleConstants.FontSize)).intValue();
ch = getText(i,1).charAt(0); ch = getText(i,1).charAt(0);
//current character is formatting //current character is formatting
if (ch == '\n' || ch == '\t') { if (ch == '\n' || ch == '\t') {
if (dcs.size() > 0) { if (dcs.size() > 0) {
DuffCode[] dc_array = new DuffCode[0]; SizedDuffCode[] dc_array
dc_array = (DuffCode[])dcs.toArray(dc_array); = (SizedDuffCode[])dcs.toArray(new SizedDuffCode[0]);
translitBuffer.append(TibTextUtils.getTranslit(EWTSNotACIP, dc_array, noSuch)); translitBuffer.append(TibTextUtils.getTranslit(EWTSNotACIP, dc_array, noSuch));
dcs.clear(); dcs.clear();
} }
translitBuffer.append(ch); translitBuffer.append(ch, fsz);
} }
//current character isn't TMW
else if ((0 == (fontNum = TibetanMachineWeb.getTMWFontNumber(fontName)))) {
if (dcs.size() > 0) {
SizedDuffCode[] dc_array
= (SizedDuffCode[])dcs.toArray(new SizedDuffCode[0]);
translitBuffer.append(TibTextUtils.getTranslit(EWTSNotACIP, dc_array, noSuch));
dcs.clear();
}
}
//current character is convertable
else {
dcs.add(new SizedDuffCode(new DuffCode(fontNum, ch), fsz));
}
i++;
}
if (dcs.size() > 0) {
SizedDuffCode[] dc_array
= (SizedDuffCode[])dcs.toArray(new SizedDuffCode[0]);
translitBuffer.append(TibTextUtils.getTranslit(EWTSNotACIP,
dc_array,
noSuch));
}
return translitBuffer.getString();
}
catch (BadLocationException ble) {
ble.printStackTrace();
ThdlDebug.noteIffyCode();
}
//current character isn't TMW return "";
else if ((0 == (fontNum = TibetanMachineWeb.getTMWFontNumber(fontName)))) { }
if (dcs.size() > 0) {
DuffCode[] dc_array = new DuffCode[0];
dc_array = (DuffCode[])dcs.toArray(dc_array);
translitBuffer.append(TibTextUtils.getTranslit(EWTSNotACIP, dc_array, noSuch));
dcs.clear();
}
}
//current character is convertable
else {
dc = new DuffCode(fontNum, ch);
dcs.add(dc);
}
i++;
}
if (dcs.size() > 0) {
DuffCode[] dc_array = new DuffCode[0];
dc_array = (DuffCode[])dcs.toArray(dc_array);
translitBuffer.append(TibTextUtils.getTranslit(EWTSNotACIP, dc_array, noSuch));
}
return translitBuffer.toString();
}
catch (BadLocationException ble) {
ble.printStackTrace();
ThdlDebug.noteIffyCode();
}
return "";
}
/** Prints to standard output a list of all the indices of /** Prints to standard output a list of all the indices of
characters that are not in a TMW font within the range [start, characters that are not in a TMW font within the range [start,
@ -1202,8 +1202,6 @@ public class TibetanDocument extends DefaultStyledDocument {
try { try {
boolean noSuchWylie[] = new boolean[] { false }; boolean noSuchWylie[] = new boolean[] { false };
DuffCode[] any_dc_array = new DuffCode[0];
DuffCode[] dc_array;
Position endPos = createPosition(end); Position endPos = createPosition(end);
int i = start; int i = start;
java.util.List dcs = new ArrayList(); java.util.List dcs = new ArrayList();
@ -1213,39 +1211,46 @@ public class TibetanDocument extends DefaultStyledDocument {
= getCharacterElement(i).getAttributes(); = getCharacterElement(i).getAttributes();
String fontName = StyleConstants.getFontFamily(attr); String fontName = StyleConstants.getFontFamily(attr);
int fontNum; int fontNum;
int iFontSize = 72; /* the failure ought to be obvious
at this size */
try {
iFontSize
= ((Integer)attr.getAttribute(StyleConstants.FontSize)).intValue();
} catch (Exception e) {
// leave it as 72
}
if ((0 == (fontNum if ((0 == (fontNum
= TibetanMachineWeb.getTMWFontNumber(fontName))) = TibetanMachineWeb.getTMWFontNumber(fontName)))
|| i==endPos.getOffset()) { || i==endPos.getOffset()) {
if (i != start) { if (i != start) {
dc_array = (DuffCode[])dcs.toArray(any_dc_array); SizedDuffCode[] sdc_array
= (SizedDuffCode[])dcs.toArray(new SizedDuffCode[0]);
/* Low-priority FIXME: If the font size
changes within a tsheg bar, the roman
output will not mimic such changes. */
// SPEED_FIXME: determining font size might be slow
int fontSize = 72; /* the failure ought to be
obvious at this size */
try {
fontSize = ((Integer)getCharacterElement(start).getAttributes().getAttribute(StyleConstants.FontSize)).intValue();
} catch (Exception e) {
// leave it as 72
}
remove(start, i-start); remove(start, i-start);
ThdlDebug.verify(getRomanAttributeSet() != null); ThdlDebug.verify(getRomanAttributeSet() != null);
insertString(start, TranslitList tb
TibTextUtils.getTranslit(EWTSNotACIP, = TibTextUtils.getTranslit(EWTSNotACIP,
dc_array, sdc_array,
noSuchWylie), noSuchWylie);
getCopyOfRomanAttributeSet(fontSize)); int lastFontSize = -1;
for (int j = 0; j < tb.length(); j++) {
TranslitTuple tt = tb.get(j);
int thisFontSize;
insertString(start,
tt.getTranslit(),
getCopyOfRomanAttributeSet(thisFontSize = tt.getFontSize()));
if (thisFontSize == lastFontSize)
throw new Error("FIXME: make this an assertion");
lastFontSize = thisFontSize;
}
dcs.clear(); dcs.clear();
} }
start = i+1; start = i+1;
} else { } else {
char ch = getText(i,1).charAt(0); char ch = getText(i,1).charAt(0);
dcs.add(new DuffCode(fontNum, ch)); dcs.add(new SizedDuffCode(new DuffCode(fontNum, ch),
iFontSize));
++numAttemptedReplacements[0]; ++numAttemptedReplacements[0];
} }

View file

@ -0,0 +1,109 @@
/*
The contents of this file are subject to the THDL Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License on the THDL web site
(http://www.thdl.org/).
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific terms governing rights and limitations under the
License.
The Initial Developer of this software is the Tibetan and Himalayan Digital
Library (THDL). Portions created by the THDL are Copyright 2004 THDL.
All Rights Reserved.
Contributor(s): ______________________________________.
*/
package org.thdl.tib.text;
import java.util.Vector;
/**
* A mutable representation of Roman transliteration with font size
* information for each character of transliteration.
*
* @author David Chandler */
class TranslitList {
/** Invariant: For all 0<=i<length(),
((TranslitTuple)vec.get(i)).getFontSize() !=
((TranslitTuple)vec.get(i+1)).getFontSize(). */
private final Vector /* of TranslitTuple */ vec;
/** Creates an empty list. */
public TranslitList() {
this.vec = new Vector();
}
/** Returns the number of TranslitTuples in this list. */
public int length() {
return vec.size();
}
public TranslitTuple get(int i) throws ArrayIndexOutOfBoundsException {
return (TranslitTuple)vec.get(i);
}
/** Appends to the end of this list a single character of Roman
transliteration with the given font size. The last element of
this list will have s appended to it if font sizes are the
same; otherwise this list grows by an element. */
public void append(char ch, int fontSize) {
append(new String(new char[] { ch }), fontSize);
}
/** Appends to the end of this list a stretch s of Roman
transliteration that has font size fontSize. The last element
of this list will have s appended to it if font sizes are the
same; otherwise this list grows by an element. */
public void append(String s, int fontSize) {
if (vec.isEmpty()) {
vec.add(new TranslitTuple(s, fontSize));
} else {
TranslitTuple tt = (TranslitTuple)vec.lastElement();
TranslitTuple newtt
= tt.getPossiblyCombinedTranslitTuple(s, fontSize);
if (tt != newtt)
vec.add(newtt);
}
}
/** Appends to the end of this list another TranslitList. The
length of this list may or may not increase; the first element
of tb and the last element of this list will be merged if
their font sizes are the same. */
public void append(TranslitList tb) {
if (this == tb)
throw new IllegalArgumentException("Cannot be this list, that would be bad!");
if (this.vec.isEmpty() || tb.vec.isEmpty()) {
this.vec.addAll(tb.vec);
} else {
int lbefore = this.length();
this.vec.addAll(tb.vec);
if (((TranslitTuple)tb.vec.firstElement()).getFontSize()
== ((TranslitTuple)this.vec.lastElement()).getFontSize()) {
// merge stretches with the same font size.
TranslitTuple a = (TranslitTuple)this.vec.remove(lbefore-1);
this.vec.set(lbefore-1,
a.getPossiblyCombinedTranslitTuple(((TranslitTuple)this.vec.get(lbefore-1)).getTranslit(),
((TranslitTuple)this.vec.get(lbefore-1)).getFontSize()));
}
}
}
/** Do not call this -- it throws an error. */
public String toString() {
throw new Error("There was a bug where this was called, so don't call this.");
}
/** Returns the full Roman transliteration. You don't get font
size information this way, of course. */
public String getString() {
StringBuffer sb = new StringBuffer();
for (int i = 0; i < length(); i++) {
sb.append(get(i).getTranslit());
}
return sb.toString();
}
}

View file

@ -0,0 +1,61 @@
/*
The contents of this file are subject to the THDL Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License on the THDL web site
(http://www.thdl.org/).
Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific terms governing rights and limitations under the
License.
The Initial Developer of this software is the Tibetan and Himalayan Digital
Library (THDL). Portions created by the THDL are Copyright 2004 THDL.
All Rights Reserved.
Contributor(s): ______________________________________.
*/
package org.thdl.tib.text;
import java.util.Vector;
/**
* A stretch of Roman transliteration all in a certain font size.
*
* @author David Chandler */
class TranslitTuple {
private final StringBuffer sb;
private final int sz;
/** Creates a TranslitTuple representing the transliteration s,
which has font size sz. */
public TranslitTuple(String s, int sz) {
this.sb = new StringBuffer(s);
this.sz = sz;
// FIXME: assert(s.length() > 0);
}
/** Appends the transliteration s to this tuple and returns this
tuple if sz, the font size for s, is the same as this tuple's
font size. Returns a new tuple for s otherwise. */
public TranslitTuple getPossiblyCombinedTranslitTuple(String s, int sz) {
if (this.sz == sz) {
sb.append(s);
return this;
} else {
return new TranslitTuple(s, sz);
}
}
/** Returns the stretch of Roman transliteration. */
public String getTranslit() { return sb.toString(); }
/** Returns the font size of the Roman transliteration. */
public int getFontSize() { return sz; }
/** Do not call this -- it throws an error. */
public String toString() {
throw new Error("There was a bug where this was called, so don't call this.");
}
}