I thought my earlier commit preserved font size info for TMW->ACIP/Wylie
conversions. It was only at a very coarse level. The feature is now truly here.
This commit is contained in:
parent
8ccf57dccb
commit
14fb449f95
6 changed files with 383 additions and 142 deletions
|
@ -36,17 +36,17 @@ import org.thdl.util.ThdlDebug;
|
|||
* @author Edward Garrett, Tibetan and Himalayan Digital Library
|
||||
* @author David Chandler */
|
||||
|
||||
public final class DuffCode {
|
||||
public final /* immutable */ class DuffCode {
|
||||
/**
|
||||
* the font number in which this glyph can be found, from 1
|
||||
* (TibetanMachineWeb/TibetanMachine) ... to 5
|
||||
* (TibetanMachineWeb4/TibetanMachineSkt4) ... to 10
|
||||
* (TibetanMachineWeb9/[Invalid for TM family]). */
|
||||
private byte fontNum;
|
||||
private /* final if the compiler were smarter */ byte fontNum;
|
||||
/**
|
||||
* the character value of this glyph, as an integer (that is, ordinal)
|
||||
*/
|
||||
private byte charNum;
|
||||
private /* final if the compiler were smarter */ byte charNum;
|
||||
|
||||
/**
|
||||
* Called by {@link TibetanMachineWeb} to generate
|
||||
|
|
38
source/org/thdl/tib/text/SizedDuffCode.java
Normal file
38
source/org/thdl/tib/text/SizedDuffCode.java
Normal file
|
@ -0,0 +1,38 @@
|
|||
/*
|
||||
The contents of this file are subject to the THDL Open Community License
|
||||
Version 1.0 (the "License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License on the THDL web site
|
||||
(http://www.thdl.org/).
|
||||
|
||||
Software distributed under the License is distributed on an "AS IS" basis,
|
||||
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
|
||||
License for the specific terms governing rights and limitations under the
|
||||
License.
|
||||
|
||||
The Initial Developer of this software is the Tibetan and Himalayan Digital
|
||||
Library (THDL). Portions created by the THDL are Copyright 2004 THDL.
|
||||
All Rights Reserved.
|
||||
|
||||
Contributor(s): ______________________________________.
|
||||
*/
|
||||
|
||||
package org.thdl.tib.text;
|
||||
|
||||
/**
|
||||
* An immutable representation of a Tibetan glyph of a certain size in
|
||||
* the TibetanMachineWeb or TibetanMachine families of fonts.
|
||||
*
|
||||
* <p>A SizedDuffCode is a pair of a font size and a {@link
|
||||
* DuffCode}.</p>
|
||||
*
|
||||
* @author David Chandler */
|
||||
final /* immutable */ class SizedDuffCode {
|
||||
private final DuffCode dc;
|
||||
private final int fontSize;
|
||||
public SizedDuffCode(DuffCode dc, int fontSize) {
|
||||
this.dc = dc;
|
||||
this.fontSize = fontSize;
|
||||
}
|
||||
public DuffCode getDuffCode() { return dc; }
|
||||
public int getFontSize() { return fontSize; }
|
||||
}
|
|
@ -936,13 +936,13 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
* @param noSuch an array which will not be touched if this is
|
||||
* successful; however, if there is no THDL Extended Wylie/ACIP
|
||||
* corresponding to these glyphs, then noSuch[0] will be set to true
|
||||
* @return the Extended Wylie/ACIP corresponding to these glyphs, or
|
||||
* null */
|
||||
public static String getTranslit(boolean EWTSNotACIP,
|
||||
DuffCode[] dcs,
|
||||
boolean noSuch[]) {
|
||||
* @return the Extended Wylie/ACIP corresponding to these glyphs (with
|
||||
* font size info), or null */
|
||||
public static TranslitList getTranslit(boolean EWTSNotACIP,
|
||||
SizedDuffCode[] dcs,
|
||||
boolean noSuch[]) {
|
||||
StringBuffer warnings = (debug ? new StringBuffer() : null);
|
||||
String ans
|
||||
TranslitList ans
|
||||
= getTranslitImplementation(EWTSNotACIP, dcs, noSuch, warnings);
|
||||
if (debug && warnings.length() > 0)
|
||||
System.out.println("DEBUG: warnings in TMW->Wylie: " + warnings);
|
||||
|
@ -985,7 +985,7 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
int pairType = TGCPair.TYPE_OTHER;
|
||||
|
||||
for (int i = 0; i < sz; i++) {
|
||||
DuffCode dc = (DuffCode)glyphList.get(i);
|
||||
DuffCode dc = ((SizedDuffCode)glyphList.get(i)).getDuffCode();
|
||||
String wylie = TibetanMachineWeb.getWylieForGlyph(dc, noSuchWylie);
|
||||
boolean buildingUpSanskritNext = false;
|
||||
if ((buildingUpSanskritNext
|
||||
|
@ -1314,12 +1314,13 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
}
|
||||
|
||||
/** Appends to translitBuffer the EWTS/ACIP for the glyph list
|
||||
glyphList (which should be an ArrayList for speed). This will
|
||||
be very user-friendly for "legal tsheg bars" and will be
|
||||
valid, but possibly ugly (interspersed with disambiguators or
|
||||
extra vowels, etc.) Wylie/ACIP for other things, such as
|
||||
Sanskrit transliteration. Updates warnings and noSuch like
|
||||
the caller does.
|
||||
glyphList (which should be an ArrayList for speed). The font
|
||||
size of the transliteration will be fontSize. The
|
||||
transliteration will be very user-friendly for "legal tsheg
|
||||
bars" and will be valid, but possibly ugly (interspersed with
|
||||
disambiguators or extra vowels, etc.) Wylie/ACIP for other
|
||||
things, such as Sanskrit transliteration. Updates warnings
|
||||
and noSuch like the caller does.
|
||||
|
||||
<p>What constitutes a legal, non-punctuation, non-whitespace
|
||||
tsheg bar? The following are the only such:</p>
|
||||
|
@ -1366,7 +1367,10 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
java.util.List glyphList,
|
||||
boolean noSuch[],
|
||||
StringBuffer warnings,
|
||||
StringBuffer translitBuffer) {
|
||||
TranslitList translitBuffer) {
|
||||
// FIXME: If font size changes within a tsheg-bar, we don't
|
||||
// handle that.
|
||||
int fontSize = ((SizedDuffCode)glyphList.get(0)).getFontSize();
|
||||
TGCList gcs
|
||||
= breakTshegBarIntoGraphemeClusters(glyphList, noSuch);
|
||||
String candidateType = getClassificationOfTshegBar(gcs, warnings, false);
|
||||
|
@ -1397,16 +1401,18 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
// and a.u and a.i, we always do it (see Rule 10
|
||||
// of the September 1, 2003 draft of EWTS
|
||||
// standard).
|
||||
translitBuffer.append(WYLIE_DISAMBIGUATING_KEY);
|
||||
translitBuffer.append(WYLIE_DISAMBIGUATING_KEY, fontSize);
|
||||
}
|
||||
translitBuffer.append(translit);
|
||||
translitBuffer.append(translit, fontSize);
|
||||
if (TibetanMachineWeb.isWylieTibetanConsonantOrConsonantStack(wylie)
|
||||
|| TibetanMachineWeb.isWylieSanskritConsonantStack(wylie)) {
|
||||
translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie));
|
||||
translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie), fontSize);
|
||||
} else if (i + 1 < sz) {
|
||||
if (TGCPair.CONSONANTAL_WITH_VOWEL != cls
|
||||
&& TGCPair.SANSKRIT_WITH_VOWEL != cls)
|
||||
translitBuffer.append(EWTSNotACIP ? WYLIE_DISAMBIGUATING_KEY : '-');
|
||||
translitBuffer.append(EWTSNotACIP
|
||||
? WYLIE_DISAMBIGUATING_KEY : '-',
|
||||
fontSize);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
@ -1465,17 +1471,24 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
|| (wylie1.equals("'") && (wylie2.equals("g") || wylie2.equals("d") || wylie2.equals("b")))) {
|
||||
if (TibetanMachineWeb.isAmbiguousWylie(wylie1, wylie2))
|
||||
if (EWTSNotACIP)
|
||||
translitBuffer.append(wylie1 + WYLIE_DISAMBIGUATING_KEY + wylie2);
|
||||
translitBuffer.append(wylie1
|
||||
+ WYLIE_DISAMBIGUATING_KEY
|
||||
+ wylie2,
|
||||
fontSize);
|
||||
else
|
||||
translitBuffer.append(acip1 + '-' + acip2);
|
||||
translitBuffer.append(acip1 + '-' + acip2,
|
||||
fontSize);
|
||||
else
|
||||
if (EWTSNotACIP)
|
||||
translitBuffer.append(wylie1 + wylie2);
|
||||
translitBuffer.append(wylie1 + wylie2,
|
||||
fontSize);
|
||||
else
|
||||
translitBuffer.append(acip1 + acip2);
|
||||
translitBuffer.append(acip1 + acip2,
|
||||
fontSize);
|
||||
|
||||
translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie2)
|
||||
+ (EWTSNotACIP ? wylie3 : acip3));
|
||||
+ (EWTSNotACIP ? wylie3 : acip3),
|
||||
fontSize);
|
||||
} else {
|
||||
if (EWTSNotACIP)
|
||||
translitBuffer.append(wylie1
|
||||
|
@ -1484,7 +1497,8 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
wylie2,
|
||||
wylie3,
|
||||
acip2,
|
||||
acip3));
|
||||
acip3),
|
||||
fontSize);
|
||||
else
|
||||
translitBuffer.append(acip1
|
||||
+ aVowelToUseAfter(EWTSNotACIP, wylie1)
|
||||
|
@ -1492,7 +1506,8 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
wylie2,
|
||||
wylie3,
|
||||
acip2,
|
||||
acip3));
|
||||
acip3),
|
||||
fontSize);
|
||||
}
|
||||
} else if ("root" == candidateType
|
||||
|| "prefix/root-root/suffix" == candidateType
|
||||
|
@ -1502,12 +1517,13 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
String wylie1 = ((TGCPair)gcs.get(0)).getWylie();
|
||||
String acip1 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(0)).getACIP();
|
||||
leftover = 1;
|
||||
translitBuffer.append((EWTSNotACIP) ? wylie1 : acip1);
|
||||
translitBuffer.append((EWTSNotACIP) ? wylie1 : acip1, fontSize);
|
||||
if (((TGCPair)gcs.get(0)).classification
|
||||
!= TGCPair.CONSONANTAL_WITH_VOWEL) {
|
||||
ThdlDebug.verify(TGCPair.CONSONANTAL_WITHOUT_VOWEL
|
||||
== ((TGCPair)gcs.get(0)).classification);
|
||||
translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie1));
|
||||
translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie1),
|
||||
fontSize);
|
||||
if (debug) System.out.println("DEBUG: appending vowel");
|
||||
} else {
|
||||
if (debug) System.out.println("DEBUG: already has vowel 2");
|
||||
|
@ -1522,7 +1538,8 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
wylie2,
|
||||
wylie3,
|
||||
acip2,
|
||||
acip3));
|
||||
acip3),
|
||||
fontSize);
|
||||
}
|
||||
} else if ("prefix-root-suffix" == candidateType
|
||||
|| "prefix-root" == candidateType
|
||||
|
@ -1534,21 +1551,24 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
leftover = 2;
|
||||
if (TibetanMachineWeb.isAmbiguousWylie(wylie1, wylie2))
|
||||
if (EWTSNotACIP)
|
||||
translitBuffer.append(wylie1 + WYLIE_DISAMBIGUATING_KEY + wylie2);
|
||||
translitBuffer.append(wylie1 + WYLIE_DISAMBIGUATING_KEY + wylie2,
|
||||
fontSize);
|
||||
else
|
||||
translitBuffer.append(acip1 + '-' + acip2);
|
||||
translitBuffer.append(acip1 + '-' + acip2,
|
||||
fontSize);
|
||||
else
|
||||
if (EWTSNotACIP)
|
||||
translitBuffer.append(wylie1 + wylie2);
|
||||
translitBuffer.append(wylie1 + wylie2, fontSize);
|
||||
else
|
||||
translitBuffer.append(acip1 + acip2);
|
||||
translitBuffer.append(acip1 + acip2, fontSize);
|
||||
|
||||
if (((TGCPair)gcs.get(1)).classification
|
||||
!= TGCPair.CONSONANTAL_WITH_VOWEL) {
|
||||
ThdlDebug.verify(TGCPair.CONSONANTAL_WITHOUT_VOWEL
|
||||
== ((TGCPair)gcs.get(1)).classification);
|
||||
if (debug) System.out.println("DEBUG: appending vowel");
|
||||
translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie2));
|
||||
translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie2),
|
||||
fontSize);
|
||||
} else {
|
||||
if (debug) System.out.println("DEBUG: already has vowel 1");
|
||||
}
|
||||
|
@ -1562,7 +1582,8 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
wylie3,
|
||||
wylie4,
|
||||
acip3,
|
||||
acip4));
|
||||
acip4),
|
||||
fontSize);
|
||||
}
|
||||
} else if ("number" == candidateType) {
|
||||
leftover = 0;
|
||||
|
@ -1577,10 +1598,12 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
lastPairTranslit = (EWTSNotACIP
|
||||
? tp.getWylie(null)
|
||||
: tp.getACIP(null));
|
||||
if (!translitBuffer.toString().endsWith(lastPairTranslit)) {
|
||||
if ((translitBuffer.length() == 0)
|
||||
|| !translitBuffer.get(translitBuffer.length() - 1).getTranslit().endsWith(lastPairTranslit)) {
|
||||
int l;
|
||||
if ((l = translitBuffer.length()) > 0) {
|
||||
char lc = translitBuffer.charAt(l - 1);
|
||||
String s = translitBuffer.get(l - 1).getTranslit();
|
||||
char lc = s.charAt(s.length() - 1);
|
||||
ThdlDebug.verify(lc == ((EWTSNotACIP) ? 'a' : 'A') /* hard-coded ACIP and EWTS values */);
|
||||
lastPairTranslit = lastPairTranslit + lc; /* 'da'i can cause this */
|
||||
} else {
|
||||
|
@ -1594,7 +1617,8 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
String y;
|
||||
translitBuffer.append(EWTSNotACIP
|
||||
? (y = tp.getWylie(lastPairTranslit))
|
||||
: (y = tp.getACIP(lastPairTranslit)));
|
||||
: (y = tp.getACIP(lastPairTranslit)),
|
||||
fontSize);
|
||||
if (appendaged)
|
||||
lastPairTranslit = y;
|
||||
}
|
||||
|
@ -1619,23 +1643,23 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
* corresponding to these glyphs, then noSuch[0] will be set to true
|
||||
* @param warnings either null or a buffer to which will be appended
|
||||
* warnings about illegal tsheg bars
|
||||
* @return the Extended Wylie/ACIP corresponding to these glyphs, or
|
||||
* null */
|
||||
private static String getTranslitImplementation(boolean EWTSNotACIP,
|
||||
DuffCode[] dcs,
|
||||
boolean noSuch[],
|
||||
StringBuffer warnings) {
|
||||
* @return the Extended Wylie/ACIP corresponding to these glyphs (with
|
||||
* font size info), or null */
|
||||
private static TranslitList getTranslitImplementation(boolean EWTSNotACIP,
|
||||
SizedDuffCode[] dcs,
|
||||
boolean noSuch[],
|
||||
StringBuffer warnings) {
|
||||
if (dcs.length == 0)
|
||||
return null;
|
||||
|
||||
ArrayList glyphList = new ArrayList();
|
||||
StringBuffer translitBuffer = new StringBuffer();
|
||||
TranslitList translitBuffer = new TranslitList();
|
||||
|
||||
// DLC FIXME: " " should become " " for ACIP
|
||||
for (int i=0; i<dcs.length; i++) {
|
||||
char ch = dcs[i].getCharacter();
|
||||
int k = dcs[i].getCharNum();
|
||||
// int fontNum = dcs[i].getFontNum();
|
||||
for (int i = 0; i < dcs.length; i++) {
|
||||
char ch = dcs[i].getDuffCode().getCharacter();
|
||||
int k = dcs[i].getDuffCode().getCharNum();
|
||||
int fsz = dcs[i].getFontSize();
|
||||
|
||||
if (k < 32) {
|
||||
if (!glyphList.isEmpty()) {
|
||||
|
@ -1649,14 +1673,17 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
// In ACIP, \n\n (or \r\n\r\n with DOS line feeds)
|
||||
// indicates a real line break.
|
||||
if (!EWTSNotACIP && '\n' == ch) {
|
||||
if (i > 0 && dcs[i - 1].getCharacter() == '\r')
|
||||
translitBuffer.append("\r\n");
|
||||
if (i > 0
|
||||
&& dcs[i - 1].getDuffCode().getCharacter() == '\r')
|
||||
translitBuffer.append("\r\n", fsz);
|
||||
else
|
||||
translitBuffer.append(ch);
|
||||
translitBuffer.append(ch, fsz);
|
||||
}
|
||||
translitBuffer.append(ch);
|
||||
translitBuffer.append(ch, fsz);
|
||||
} else {
|
||||
String wylie = TibetanMachineWeb.getWylieForGlyph(dcs[i], noSuch);
|
||||
String wylie
|
||||
= TibetanMachineWeb.getWylieForGlyph(dcs[i].getDuffCode(),
|
||||
noSuch);
|
||||
String acip = null;
|
||||
if (!EWTSNotACIP) {
|
||||
// U+0F04 and U+0F05 -- these require lookahead to
|
||||
|
@ -1665,12 +1692,12 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
|
||||
int howManyConsumed[] = new int[] { -1 /* invalid */ };
|
||||
|
||||
acip = TibetanMachineWeb.getACIPForGlyph(dcs[i],
|
||||
acip = TibetanMachineWeb.getACIPForGlyph(dcs[i].getDuffCode(),
|
||||
((i+1<dcs.length)
|
||||
? dcs[i+1]
|
||||
? dcs[i+1].getDuffCode()
|
||||
: null),
|
||||
((i+2<dcs.length)
|
||||
? dcs[i+2]
|
||||
? dcs[i+2].getDuffCode()
|
||||
: null),
|
||||
noSuch,
|
||||
howManyConsumed);
|
||||
|
@ -1690,7 +1717,8 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
warnings, translitBuffer);
|
||||
glyphList.clear();
|
||||
}
|
||||
translitBuffer.append(EWTSNotACIP ? wylie : acip); //append the punctuation
|
||||
//append the punctuation:
|
||||
translitBuffer.append(EWTSNotACIP ? wylie : acip, fsz);
|
||||
} else {
|
||||
glyphList.add(dcs[i]);
|
||||
}
|
||||
|
@ -1708,10 +1736,10 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
}
|
||||
|
||||
if (translitBuffer.length() > 0) {
|
||||
return translitBuffer.toString();
|
||||
}
|
||||
else
|
||||
return translitBuffer;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns "root" instead of "appendaged-root", for example. */
|
||||
|
|
|
@ -382,69 +382,69 @@ public class TibetanDocument extends DefaultStyledDocument {
|
|||
return getTranslit(false, begin, end, noSuchACIP);
|
||||
}
|
||||
|
||||
private String getTranslit(boolean EWTSNotACIP, int begin, int end, boolean noSuch[]) {
|
||||
AttributeSet attr;
|
||||
String fontName;
|
||||
int fontNum;
|
||||
DuffCode dc;
|
||||
char ch;
|
||||
private String getTranslit(boolean EWTSNotACIP, int begin, int end, boolean noSuch[]) {
|
||||
AttributeSet attr;
|
||||
String fontName;
|
||||
int fontNum;
|
||||
char ch;
|
||||
|
||||
if (begin >= end)
|
||||
return "";
|
||||
if (begin >= end)
|
||||
return "";
|
||||
|
||||
java.util.List dcs = new ArrayList();
|
||||
int i = begin;
|
||||
StringBuffer translitBuffer = new StringBuffer();
|
||||
java.util.List dcs = new ArrayList();
|
||||
int i = begin;
|
||||
TranslitList translitBuffer = new TranslitList();
|
||||
|
||||
try {
|
||||
while (i < end) {
|
||||
attr = getCharacterElement(i).getAttributes();
|
||||
fontName = StyleConstants.getFontFamily(attr);
|
||||
try {
|
||||
while (i < end) {
|
||||
attr = getCharacterElement(i).getAttributes();
|
||||
fontName = StyleConstants.getFontFamily(attr);
|
||||
int fsz
|
||||
= ((Integer)attr.getAttribute(StyleConstants.FontSize)).intValue();
|
||||
|
||||
ch = getText(i,1).charAt(0);
|
||||
ch = getText(i,1).charAt(0);
|
||||
|
||||
//current character is formatting
|
||||
if (ch == '\n' || ch == '\t') {
|
||||
if (dcs.size() > 0) {
|
||||
DuffCode[] dc_array = new DuffCode[0];
|
||||
dc_array = (DuffCode[])dcs.toArray(dc_array);
|
||||
translitBuffer.append(TibTextUtils.getTranslit(EWTSNotACIP, dc_array, noSuch));
|
||||
dcs.clear();
|
||||
}
|
||||
translitBuffer.append(ch);
|
||||
}
|
||||
//current character is formatting
|
||||
if (ch == '\n' || ch == '\t') {
|
||||
if (dcs.size() > 0) {
|
||||
SizedDuffCode[] dc_array
|
||||
= (SizedDuffCode[])dcs.toArray(new SizedDuffCode[0]);
|
||||
translitBuffer.append(TibTextUtils.getTranslit(EWTSNotACIP, dc_array, noSuch));
|
||||
dcs.clear();
|
||||
}
|
||||
translitBuffer.append(ch, fsz);
|
||||
}
|
||||
//current character isn't TMW
|
||||
else if ((0 == (fontNum = TibetanMachineWeb.getTMWFontNumber(fontName)))) {
|
||||
if (dcs.size() > 0) {
|
||||
SizedDuffCode[] dc_array
|
||||
= (SizedDuffCode[])dcs.toArray(new SizedDuffCode[0]);
|
||||
translitBuffer.append(TibTextUtils.getTranslit(EWTSNotACIP, dc_array, noSuch));
|
||||
dcs.clear();
|
||||
}
|
||||
}
|
||||
//current character is convertable
|
||||
else {
|
||||
dcs.add(new SizedDuffCode(new DuffCode(fontNum, ch), fsz));
|
||||
}
|
||||
i++;
|
||||
}
|
||||
if (dcs.size() > 0) {
|
||||
SizedDuffCode[] dc_array
|
||||
= (SizedDuffCode[])dcs.toArray(new SizedDuffCode[0]);
|
||||
translitBuffer.append(TibTextUtils.getTranslit(EWTSNotACIP,
|
||||
dc_array,
|
||||
noSuch));
|
||||
}
|
||||
return translitBuffer.getString();
|
||||
}
|
||||
catch (BadLocationException ble) {
|
||||
ble.printStackTrace();
|
||||
ThdlDebug.noteIffyCode();
|
||||
}
|
||||
|
||||
//current character isn't TMW
|
||||
else if ((0 == (fontNum = TibetanMachineWeb.getTMWFontNumber(fontName)))) {
|
||||
if (dcs.size() > 0) {
|
||||
DuffCode[] dc_array = new DuffCode[0];
|
||||
dc_array = (DuffCode[])dcs.toArray(dc_array);
|
||||
translitBuffer.append(TibTextUtils.getTranslit(EWTSNotACIP, dc_array, noSuch));
|
||||
dcs.clear();
|
||||
}
|
||||
}
|
||||
|
||||
//current character is convertable
|
||||
else {
|
||||
dc = new DuffCode(fontNum, ch);
|
||||
dcs.add(dc);
|
||||
}
|
||||
i++;
|
||||
}
|
||||
if (dcs.size() > 0) {
|
||||
DuffCode[] dc_array = new DuffCode[0];
|
||||
dc_array = (DuffCode[])dcs.toArray(dc_array);
|
||||
translitBuffer.append(TibTextUtils.getTranslit(EWTSNotACIP, dc_array, noSuch));
|
||||
}
|
||||
return translitBuffer.toString();
|
||||
}
|
||||
catch (BadLocationException ble) {
|
||||
ble.printStackTrace();
|
||||
ThdlDebug.noteIffyCode();
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
/** Prints to standard output a list of all the indices of
|
||||
characters that are not in a TMW font within the range [start,
|
||||
|
@ -1202,8 +1202,6 @@ public class TibetanDocument extends DefaultStyledDocument {
|
|||
|
||||
try {
|
||||
boolean noSuchWylie[] = new boolean[] { false };
|
||||
DuffCode[] any_dc_array = new DuffCode[0];
|
||||
DuffCode[] dc_array;
|
||||
Position endPos = createPosition(end);
|
||||
int i = start;
|
||||
java.util.List dcs = new ArrayList();
|
||||
|
@ -1213,39 +1211,46 @@ public class TibetanDocument extends DefaultStyledDocument {
|
|||
= getCharacterElement(i).getAttributes();
|
||||
String fontName = StyleConstants.getFontFamily(attr);
|
||||
int fontNum;
|
||||
int iFontSize = 72; /* the failure ought to be obvious
|
||||
at this size */
|
||||
try {
|
||||
iFontSize
|
||||
= ((Integer)attr.getAttribute(StyleConstants.FontSize)).intValue();
|
||||
} catch (Exception e) {
|
||||
// leave it as 72
|
||||
}
|
||||
|
||||
if ((0 == (fontNum
|
||||
= TibetanMachineWeb.getTMWFontNumber(fontName)))
|
||||
|| i==endPos.getOffset()) {
|
||||
if (i != start) {
|
||||
dc_array = (DuffCode[])dcs.toArray(any_dc_array);
|
||||
|
||||
/* Low-priority FIXME: If the font size
|
||||
changes within a tsheg bar, the roman
|
||||
output will not mimic such changes. */
|
||||
|
||||
// SPEED_FIXME: determining font size might be slow
|
||||
int fontSize = 72; /* the failure ought to be
|
||||
obvious at this size */
|
||||
try {
|
||||
fontSize = ((Integer)getCharacterElement(start).getAttributes().getAttribute(StyleConstants.FontSize)).intValue();
|
||||
} catch (Exception e) {
|
||||
// leave it as 72
|
||||
}
|
||||
SizedDuffCode[] sdc_array
|
||||
= (SizedDuffCode[])dcs.toArray(new SizedDuffCode[0]);
|
||||
|
||||
remove(start, i-start);
|
||||
ThdlDebug.verify(getRomanAttributeSet() != null);
|
||||
insertString(start,
|
||||
TibTextUtils.getTranslit(EWTSNotACIP,
|
||||
dc_array,
|
||||
noSuchWylie),
|
||||
getCopyOfRomanAttributeSet(fontSize));
|
||||
TranslitList tb
|
||||
= TibTextUtils.getTranslit(EWTSNotACIP,
|
||||
sdc_array,
|
||||
noSuchWylie);
|
||||
int lastFontSize = -1;
|
||||
for (int j = 0; j < tb.length(); j++) {
|
||||
TranslitTuple tt = tb.get(j);
|
||||
int thisFontSize;
|
||||
insertString(start,
|
||||
tt.getTranslit(),
|
||||
getCopyOfRomanAttributeSet(thisFontSize = tt.getFontSize()));
|
||||
if (thisFontSize == lastFontSize)
|
||||
throw new Error("FIXME: make this an assertion");
|
||||
lastFontSize = thisFontSize;
|
||||
}
|
||||
dcs.clear();
|
||||
}
|
||||
start = i+1;
|
||||
} else {
|
||||
char ch = getText(i,1).charAt(0);
|
||||
dcs.add(new DuffCode(fontNum, ch));
|
||||
dcs.add(new SizedDuffCode(new DuffCode(fontNum, ch),
|
||||
iFontSize));
|
||||
++numAttemptedReplacements[0];
|
||||
}
|
||||
|
||||
|
|
109
source/org/thdl/tib/text/TranslitList.java
Normal file
109
source/org/thdl/tib/text/TranslitList.java
Normal file
|
@ -0,0 +1,109 @@
|
|||
/*
|
||||
The contents of this file are subject to the THDL Open Community License
|
||||
Version 1.0 (the "License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License on the THDL web site
|
||||
(http://www.thdl.org/).
|
||||
|
||||
Software distributed under the License is distributed on an "AS IS" basis,
|
||||
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
|
||||
License for the specific terms governing rights and limitations under the
|
||||
License.
|
||||
|
||||
The Initial Developer of this software is the Tibetan and Himalayan Digital
|
||||
Library (THDL). Portions created by the THDL are Copyright 2004 THDL.
|
||||
All Rights Reserved.
|
||||
|
||||
Contributor(s): ______________________________________.
|
||||
*/
|
||||
|
||||
package org.thdl.tib.text;
|
||||
|
||||
import java.util.Vector;
|
||||
|
||||
/**
|
||||
* A mutable representation of Roman transliteration with font size
|
||||
* information for each character of transliteration.
|
||||
*
|
||||
* @author David Chandler */
|
||||
class TranslitList {
|
||||
/** Invariant: For all 0<=i<length(),
|
||||
((TranslitTuple)vec.get(i)).getFontSize() !=
|
||||
((TranslitTuple)vec.get(i+1)).getFontSize(). */
|
||||
private final Vector /* of TranslitTuple */ vec;
|
||||
|
||||
/** Creates an empty list. */
|
||||
public TranslitList() {
|
||||
this.vec = new Vector();
|
||||
}
|
||||
|
||||
/** Returns the number of TranslitTuples in this list. */
|
||||
public int length() {
|
||||
return vec.size();
|
||||
}
|
||||
|
||||
public TranslitTuple get(int i) throws ArrayIndexOutOfBoundsException {
|
||||
return (TranslitTuple)vec.get(i);
|
||||
}
|
||||
|
||||
/** Appends to the end of this list a single character of Roman
|
||||
transliteration with the given font size. The last element of
|
||||
this list will have s appended to it if font sizes are the
|
||||
same; otherwise this list grows by an element. */
|
||||
public void append(char ch, int fontSize) {
|
||||
append(new String(new char[] { ch }), fontSize);
|
||||
}
|
||||
|
||||
/** Appends to the end of this list a stretch s of Roman
|
||||
transliteration that has font size fontSize. The last element
|
||||
of this list will have s appended to it if font sizes are the
|
||||
same; otherwise this list grows by an element. */
|
||||
public void append(String s, int fontSize) {
|
||||
if (vec.isEmpty()) {
|
||||
vec.add(new TranslitTuple(s, fontSize));
|
||||
} else {
|
||||
TranslitTuple tt = (TranslitTuple)vec.lastElement();
|
||||
TranslitTuple newtt
|
||||
= tt.getPossiblyCombinedTranslitTuple(s, fontSize);
|
||||
if (tt != newtt)
|
||||
vec.add(newtt);
|
||||
}
|
||||
}
|
||||
|
||||
/** Appends to the end of this list another TranslitList. The
|
||||
length of this list may or may not increase; the first element
|
||||
of tb and the last element of this list will be merged if
|
||||
their font sizes are the same. */
|
||||
public void append(TranslitList tb) {
|
||||
if (this == tb)
|
||||
throw new IllegalArgumentException("Cannot be this list, that would be bad!");
|
||||
if (this.vec.isEmpty() || tb.vec.isEmpty()) {
|
||||
this.vec.addAll(tb.vec);
|
||||
} else {
|
||||
int lbefore = this.length();
|
||||
this.vec.addAll(tb.vec);
|
||||
if (((TranslitTuple)tb.vec.firstElement()).getFontSize()
|
||||
== ((TranslitTuple)this.vec.lastElement()).getFontSize()) {
|
||||
// merge stretches with the same font size.
|
||||
TranslitTuple a = (TranslitTuple)this.vec.remove(lbefore-1);
|
||||
this.vec.set(lbefore-1,
|
||||
a.getPossiblyCombinedTranslitTuple(((TranslitTuple)this.vec.get(lbefore-1)).getTranslit(),
|
||||
((TranslitTuple)this.vec.get(lbefore-1)).getFontSize()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Do not call this -- it throws an error. */
|
||||
public String toString() {
|
||||
throw new Error("There was a bug where this was called, so don't call this.");
|
||||
}
|
||||
|
||||
/** Returns the full Roman transliteration. You don't get font
|
||||
size information this way, of course. */
|
||||
public String getString() {
|
||||
StringBuffer sb = new StringBuffer();
|
||||
for (int i = 0; i < length(); i++) {
|
||||
sb.append(get(i).getTranslit());
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
}
|
61
source/org/thdl/tib/text/TranslitTuple.java
Normal file
61
source/org/thdl/tib/text/TranslitTuple.java
Normal file
|
@ -0,0 +1,61 @@
|
|||
/*
|
||||
The contents of this file are subject to the THDL Open Community License
|
||||
Version 1.0 (the "License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License on the THDL web site
|
||||
(http://www.thdl.org/).
|
||||
|
||||
Software distributed under the License is distributed on an "AS IS" basis,
|
||||
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
|
||||
License for the specific terms governing rights and limitations under the
|
||||
License.
|
||||
|
||||
The Initial Developer of this software is the Tibetan and Himalayan Digital
|
||||
Library (THDL). Portions created by the THDL are Copyright 2004 THDL.
|
||||
All Rights Reserved.
|
||||
|
||||
Contributor(s): ______________________________________.
|
||||
*/
|
||||
|
||||
package org.thdl.tib.text;
|
||||
|
||||
import java.util.Vector;
|
||||
|
||||
/**
|
||||
* A stretch of Roman transliteration all in a certain font size.
|
||||
*
|
||||
* @author David Chandler */
|
||||
class TranslitTuple {
|
||||
private final StringBuffer sb;
|
||||
private final int sz;
|
||||
|
||||
/** Creates a TranslitTuple representing the transliteration s,
|
||||
which has font size sz. */
|
||||
public TranslitTuple(String s, int sz) {
|
||||
this.sb = new StringBuffer(s);
|
||||
this.sz = sz;
|
||||
// FIXME: assert(s.length() > 0);
|
||||
}
|
||||
|
||||
/** Appends the transliteration s to this tuple and returns this
|
||||
tuple if sz, the font size for s, is the same as this tuple's
|
||||
font size. Returns a new tuple for s otherwise. */
|
||||
public TranslitTuple getPossiblyCombinedTranslitTuple(String s, int sz) {
|
||||
if (this.sz == sz) {
|
||||
sb.append(s);
|
||||
return this;
|
||||
} else {
|
||||
return new TranslitTuple(s, sz);
|
||||
}
|
||||
}
|
||||
|
||||
/** Returns the stretch of Roman transliteration. */
|
||||
public String getTranslit() { return sb.toString(); }
|
||||
|
||||
/** Returns the font size of the Roman transliteration. */
|
||||
public int getFontSize() { return sz; }
|
||||
|
||||
/** Do not call this -- it throws an error. */
|
||||
public String toString() {
|
||||
throw new Error("There was a bug where this was called, so don't call this.");
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue