I thought my earlier commit preserved font size info for TMW->ACIP/Wylie
conversions. It was only at a very coarse level. The feature is now truly here.
This commit is contained in:
parent
8ccf57dccb
commit
14fb449f95
6 changed files with 383 additions and 142 deletions
|
@ -36,17 +36,17 @@ import org.thdl.util.ThdlDebug;
|
||||||
* @author Edward Garrett, Tibetan and Himalayan Digital Library
|
* @author Edward Garrett, Tibetan and Himalayan Digital Library
|
||||||
* @author David Chandler */
|
* @author David Chandler */
|
||||||
|
|
||||||
public final class DuffCode {
|
public final /* immutable */ class DuffCode {
|
||||||
/**
|
/**
|
||||||
* the font number in which this glyph can be found, from 1
|
* the font number in which this glyph can be found, from 1
|
||||||
* (TibetanMachineWeb/TibetanMachine) ... to 5
|
* (TibetanMachineWeb/TibetanMachine) ... to 5
|
||||||
* (TibetanMachineWeb4/TibetanMachineSkt4) ... to 10
|
* (TibetanMachineWeb4/TibetanMachineSkt4) ... to 10
|
||||||
* (TibetanMachineWeb9/[Invalid for TM family]). */
|
* (TibetanMachineWeb9/[Invalid for TM family]). */
|
||||||
private byte fontNum;
|
private /* final if the compiler were smarter */ byte fontNum;
|
||||||
/**
|
/**
|
||||||
* the character value of this glyph, as an integer (that is, ordinal)
|
* the character value of this glyph, as an integer (that is, ordinal)
|
||||||
*/
|
*/
|
||||||
private byte charNum;
|
private /* final if the compiler were smarter */ byte charNum;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Called by {@link TibetanMachineWeb} to generate
|
* Called by {@link TibetanMachineWeb} to generate
|
||||||
|
|
38
source/org/thdl/tib/text/SizedDuffCode.java
Normal file
38
source/org/thdl/tib/text/SizedDuffCode.java
Normal file
|
@ -0,0 +1,38 @@
|
||||||
|
/*
|
||||||
|
The contents of this file are subject to the THDL Open Community License
|
||||||
|
Version 1.0 (the "License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License on the THDL web site
|
||||||
|
(http://www.thdl.org/).
|
||||||
|
|
||||||
|
Software distributed under the License is distributed on an "AS IS" basis,
|
||||||
|
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
|
||||||
|
License for the specific terms governing rights and limitations under the
|
||||||
|
License.
|
||||||
|
|
||||||
|
The Initial Developer of this software is the Tibetan and Himalayan Digital
|
||||||
|
Library (THDL). Portions created by the THDL are Copyright 2004 THDL.
|
||||||
|
All Rights Reserved.
|
||||||
|
|
||||||
|
Contributor(s): ______________________________________.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.thdl.tib.text;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An immutable representation of a Tibetan glyph of a certain size in
|
||||||
|
* the TibetanMachineWeb or TibetanMachine families of fonts.
|
||||||
|
*
|
||||||
|
* <p>A SizedDuffCode is a pair of a font size and a {@link
|
||||||
|
* DuffCode}.</p>
|
||||||
|
*
|
||||||
|
* @author David Chandler */
|
||||||
|
final /* immutable */ class SizedDuffCode {
|
||||||
|
private final DuffCode dc;
|
||||||
|
private final int fontSize;
|
||||||
|
public SizedDuffCode(DuffCode dc, int fontSize) {
|
||||||
|
this.dc = dc;
|
||||||
|
this.fontSize = fontSize;
|
||||||
|
}
|
||||||
|
public DuffCode getDuffCode() { return dc; }
|
||||||
|
public int getFontSize() { return fontSize; }
|
||||||
|
}
|
|
@ -936,13 +936,13 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
* @param noSuch an array which will not be touched if this is
|
* @param noSuch an array which will not be touched if this is
|
||||||
* successful; however, if there is no THDL Extended Wylie/ACIP
|
* successful; however, if there is no THDL Extended Wylie/ACIP
|
||||||
* corresponding to these glyphs, then noSuch[0] will be set to true
|
* corresponding to these glyphs, then noSuch[0] will be set to true
|
||||||
* @return the Extended Wylie/ACIP corresponding to these glyphs, or
|
* @return the Extended Wylie/ACIP corresponding to these glyphs (with
|
||||||
* null */
|
* font size info), or null */
|
||||||
public static String getTranslit(boolean EWTSNotACIP,
|
public static TranslitList getTranslit(boolean EWTSNotACIP,
|
||||||
DuffCode[] dcs,
|
SizedDuffCode[] dcs,
|
||||||
boolean noSuch[]) {
|
boolean noSuch[]) {
|
||||||
StringBuffer warnings = (debug ? new StringBuffer() : null);
|
StringBuffer warnings = (debug ? new StringBuffer() : null);
|
||||||
String ans
|
TranslitList ans
|
||||||
= getTranslitImplementation(EWTSNotACIP, dcs, noSuch, warnings);
|
= getTranslitImplementation(EWTSNotACIP, dcs, noSuch, warnings);
|
||||||
if (debug && warnings.length() > 0)
|
if (debug && warnings.length() > 0)
|
||||||
System.out.println("DEBUG: warnings in TMW->Wylie: " + warnings);
|
System.out.println("DEBUG: warnings in TMW->Wylie: " + warnings);
|
||||||
|
@ -985,7 +985,7 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
int pairType = TGCPair.TYPE_OTHER;
|
int pairType = TGCPair.TYPE_OTHER;
|
||||||
|
|
||||||
for (int i = 0; i < sz; i++) {
|
for (int i = 0; i < sz; i++) {
|
||||||
DuffCode dc = (DuffCode)glyphList.get(i);
|
DuffCode dc = ((SizedDuffCode)glyphList.get(i)).getDuffCode();
|
||||||
String wylie = TibetanMachineWeb.getWylieForGlyph(dc, noSuchWylie);
|
String wylie = TibetanMachineWeb.getWylieForGlyph(dc, noSuchWylie);
|
||||||
boolean buildingUpSanskritNext = false;
|
boolean buildingUpSanskritNext = false;
|
||||||
if ((buildingUpSanskritNext
|
if ((buildingUpSanskritNext
|
||||||
|
@ -1314,12 +1314,13 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Appends to translitBuffer the EWTS/ACIP for the glyph list
|
/** Appends to translitBuffer the EWTS/ACIP for the glyph list
|
||||||
glyphList (which should be an ArrayList for speed). This will
|
glyphList (which should be an ArrayList for speed). The font
|
||||||
be very user-friendly for "legal tsheg bars" and will be
|
size of the transliteration will be fontSize. The
|
||||||
valid, but possibly ugly (interspersed with disambiguators or
|
transliteration will be very user-friendly for "legal tsheg
|
||||||
extra vowels, etc.) Wylie/ACIP for other things, such as
|
bars" and will be valid, but possibly ugly (interspersed with
|
||||||
Sanskrit transliteration. Updates warnings and noSuch like
|
disambiguators or extra vowels, etc.) Wylie/ACIP for other
|
||||||
the caller does.
|
things, such as Sanskrit transliteration. Updates warnings
|
||||||
|
and noSuch like the caller does.
|
||||||
|
|
||||||
<p>What constitutes a legal, non-punctuation, non-whitespace
|
<p>What constitutes a legal, non-punctuation, non-whitespace
|
||||||
tsheg bar? The following are the only such:</p>
|
tsheg bar? The following are the only such:</p>
|
||||||
|
@ -1366,7 +1367,10 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
java.util.List glyphList,
|
java.util.List glyphList,
|
||||||
boolean noSuch[],
|
boolean noSuch[],
|
||||||
StringBuffer warnings,
|
StringBuffer warnings,
|
||||||
StringBuffer translitBuffer) {
|
TranslitList translitBuffer) {
|
||||||
|
// FIXME: If font size changes within a tsheg-bar, we don't
|
||||||
|
// handle that.
|
||||||
|
int fontSize = ((SizedDuffCode)glyphList.get(0)).getFontSize();
|
||||||
TGCList gcs
|
TGCList gcs
|
||||||
= breakTshegBarIntoGraphemeClusters(glyphList, noSuch);
|
= breakTshegBarIntoGraphemeClusters(glyphList, noSuch);
|
||||||
String candidateType = getClassificationOfTshegBar(gcs, warnings, false);
|
String candidateType = getClassificationOfTshegBar(gcs, warnings, false);
|
||||||
|
@ -1397,16 +1401,18 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
// and a.u and a.i, we always do it (see Rule 10
|
// and a.u and a.i, we always do it (see Rule 10
|
||||||
// of the September 1, 2003 draft of EWTS
|
// of the September 1, 2003 draft of EWTS
|
||||||
// standard).
|
// standard).
|
||||||
translitBuffer.append(WYLIE_DISAMBIGUATING_KEY);
|
translitBuffer.append(WYLIE_DISAMBIGUATING_KEY, fontSize);
|
||||||
}
|
}
|
||||||
translitBuffer.append(translit);
|
translitBuffer.append(translit, fontSize);
|
||||||
if (TibetanMachineWeb.isWylieTibetanConsonantOrConsonantStack(wylie)
|
if (TibetanMachineWeb.isWylieTibetanConsonantOrConsonantStack(wylie)
|
||||||
|| TibetanMachineWeb.isWylieSanskritConsonantStack(wylie)) {
|
|| TibetanMachineWeb.isWylieSanskritConsonantStack(wylie)) {
|
||||||
translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie));
|
translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie), fontSize);
|
||||||
} else if (i + 1 < sz) {
|
} else if (i + 1 < sz) {
|
||||||
if (TGCPair.CONSONANTAL_WITH_VOWEL != cls
|
if (TGCPair.CONSONANTAL_WITH_VOWEL != cls
|
||||||
&& TGCPair.SANSKRIT_WITH_VOWEL != cls)
|
&& TGCPair.SANSKRIT_WITH_VOWEL != cls)
|
||||||
translitBuffer.append(EWTSNotACIP ? WYLIE_DISAMBIGUATING_KEY : '-');
|
translitBuffer.append(EWTSNotACIP
|
||||||
|
? WYLIE_DISAMBIGUATING_KEY : '-',
|
||||||
|
fontSize);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -1465,17 +1471,24 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
|| (wylie1.equals("'") && (wylie2.equals("g") || wylie2.equals("d") || wylie2.equals("b")))) {
|
|| (wylie1.equals("'") && (wylie2.equals("g") || wylie2.equals("d") || wylie2.equals("b")))) {
|
||||||
if (TibetanMachineWeb.isAmbiguousWylie(wylie1, wylie2))
|
if (TibetanMachineWeb.isAmbiguousWylie(wylie1, wylie2))
|
||||||
if (EWTSNotACIP)
|
if (EWTSNotACIP)
|
||||||
translitBuffer.append(wylie1 + WYLIE_DISAMBIGUATING_KEY + wylie2);
|
translitBuffer.append(wylie1
|
||||||
|
+ WYLIE_DISAMBIGUATING_KEY
|
||||||
|
+ wylie2,
|
||||||
|
fontSize);
|
||||||
else
|
else
|
||||||
translitBuffer.append(acip1 + '-' + acip2);
|
translitBuffer.append(acip1 + '-' + acip2,
|
||||||
|
fontSize);
|
||||||
else
|
else
|
||||||
if (EWTSNotACIP)
|
if (EWTSNotACIP)
|
||||||
translitBuffer.append(wylie1 + wylie2);
|
translitBuffer.append(wylie1 + wylie2,
|
||||||
|
fontSize);
|
||||||
else
|
else
|
||||||
translitBuffer.append(acip1 + acip2);
|
translitBuffer.append(acip1 + acip2,
|
||||||
|
fontSize);
|
||||||
|
|
||||||
translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie2)
|
translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie2)
|
||||||
+ (EWTSNotACIP ? wylie3 : acip3));
|
+ (EWTSNotACIP ? wylie3 : acip3),
|
||||||
|
fontSize);
|
||||||
} else {
|
} else {
|
||||||
if (EWTSNotACIP)
|
if (EWTSNotACIP)
|
||||||
translitBuffer.append(wylie1
|
translitBuffer.append(wylie1
|
||||||
|
@ -1484,7 +1497,8 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
wylie2,
|
wylie2,
|
||||||
wylie3,
|
wylie3,
|
||||||
acip2,
|
acip2,
|
||||||
acip3));
|
acip3),
|
||||||
|
fontSize);
|
||||||
else
|
else
|
||||||
translitBuffer.append(acip1
|
translitBuffer.append(acip1
|
||||||
+ aVowelToUseAfter(EWTSNotACIP, wylie1)
|
+ aVowelToUseAfter(EWTSNotACIP, wylie1)
|
||||||
|
@ -1492,7 +1506,8 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
wylie2,
|
wylie2,
|
||||||
wylie3,
|
wylie3,
|
||||||
acip2,
|
acip2,
|
||||||
acip3));
|
acip3),
|
||||||
|
fontSize);
|
||||||
}
|
}
|
||||||
} else if ("root" == candidateType
|
} else if ("root" == candidateType
|
||||||
|| "prefix/root-root/suffix" == candidateType
|
|| "prefix/root-root/suffix" == candidateType
|
||||||
|
@ -1502,12 +1517,13 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
String wylie1 = ((TGCPair)gcs.get(0)).getWylie();
|
String wylie1 = ((TGCPair)gcs.get(0)).getWylie();
|
||||||
String acip1 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(0)).getACIP();
|
String acip1 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(0)).getACIP();
|
||||||
leftover = 1;
|
leftover = 1;
|
||||||
translitBuffer.append((EWTSNotACIP) ? wylie1 : acip1);
|
translitBuffer.append((EWTSNotACIP) ? wylie1 : acip1, fontSize);
|
||||||
if (((TGCPair)gcs.get(0)).classification
|
if (((TGCPair)gcs.get(0)).classification
|
||||||
!= TGCPair.CONSONANTAL_WITH_VOWEL) {
|
!= TGCPair.CONSONANTAL_WITH_VOWEL) {
|
||||||
ThdlDebug.verify(TGCPair.CONSONANTAL_WITHOUT_VOWEL
|
ThdlDebug.verify(TGCPair.CONSONANTAL_WITHOUT_VOWEL
|
||||||
== ((TGCPair)gcs.get(0)).classification);
|
== ((TGCPair)gcs.get(0)).classification);
|
||||||
translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie1));
|
translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie1),
|
||||||
|
fontSize);
|
||||||
if (debug) System.out.println("DEBUG: appending vowel");
|
if (debug) System.out.println("DEBUG: appending vowel");
|
||||||
} else {
|
} else {
|
||||||
if (debug) System.out.println("DEBUG: already has vowel 2");
|
if (debug) System.out.println("DEBUG: already has vowel 2");
|
||||||
|
@ -1522,7 +1538,8 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
wylie2,
|
wylie2,
|
||||||
wylie3,
|
wylie3,
|
||||||
acip2,
|
acip2,
|
||||||
acip3));
|
acip3),
|
||||||
|
fontSize);
|
||||||
}
|
}
|
||||||
} else if ("prefix-root-suffix" == candidateType
|
} else if ("prefix-root-suffix" == candidateType
|
||||||
|| "prefix-root" == candidateType
|
|| "prefix-root" == candidateType
|
||||||
|
@ -1534,21 +1551,24 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
leftover = 2;
|
leftover = 2;
|
||||||
if (TibetanMachineWeb.isAmbiguousWylie(wylie1, wylie2))
|
if (TibetanMachineWeb.isAmbiguousWylie(wylie1, wylie2))
|
||||||
if (EWTSNotACIP)
|
if (EWTSNotACIP)
|
||||||
translitBuffer.append(wylie1 + WYLIE_DISAMBIGUATING_KEY + wylie2);
|
translitBuffer.append(wylie1 + WYLIE_DISAMBIGUATING_KEY + wylie2,
|
||||||
|
fontSize);
|
||||||
else
|
else
|
||||||
translitBuffer.append(acip1 + '-' + acip2);
|
translitBuffer.append(acip1 + '-' + acip2,
|
||||||
|
fontSize);
|
||||||
else
|
else
|
||||||
if (EWTSNotACIP)
|
if (EWTSNotACIP)
|
||||||
translitBuffer.append(wylie1 + wylie2);
|
translitBuffer.append(wylie1 + wylie2, fontSize);
|
||||||
else
|
else
|
||||||
translitBuffer.append(acip1 + acip2);
|
translitBuffer.append(acip1 + acip2, fontSize);
|
||||||
|
|
||||||
if (((TGCPair)gcs.get(1)).classification
|
if (((TGCPair)gcs.get(1)).classification
|
||||||
!= TGCPair.CONSONANTAL_WITH_VOWEL) {
|
!= TGCPair.CONSONANTAL_WITH_VOWEL) {
|
||||||
ThdlDebug.verify(TGCPair.CONSONANTAL_WITHOUT_VOWEL
|
ThdlDebug.verify(TGCPair.CONSONANTAL_WITHOUT_VOWEL
|
||||||
== ((TGCPair)gcs.get(1)).classification);
|
== ((TGCPair)gcs.get(1)).classification);
|
||||||
if (debug) System.out.println("DEBUG: appending vowel");
|
if (debug) System.out.println("DEBUG: appending vowel");
|
||||||
translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie2));
|
translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie2),
|
||||||
|
fontSize);
|
||||||
} else {
|
} else {
|
||||||
if (debug) System.out.println("DEBUG: already has vowel 1");
|
if (debug) System.out.println("DEBUG: already has vowel 1");
|
||||||
}
|
}
|
||||||
|
@ -1562,7 +1582,8 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
wylie3,
|
wylie3,
|
||||||
wylie4,
|
wylie4,
|
||||||
acip3,
|
acip3,
|
||||||
acip4));
|
acip4),
|
||||||
|
fontSize);
|
||||||
}
|
}
|
||||||
} else if ("number" == candidateType) {
|
} else if ("number" == candidateType) {
|
||||||
leftover = 0;
|
leftover = 0;
|
||||||
|
@ -1577,10 +1598,12 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
lastPairTranslit = (EWTSNotACIP
|
lastPairTranslit = (EWTSNotACIP
|
||||||
? tp.getWylie(null)
|
? tp.getWylie(null)
|
||||||
: tp.getACIP(null));
|
: tp.getACIP(null));
|
||||||
if (!translitBuffer.toString().endsWith(lastPairTranslit)) {
|
if ((translitBuffer.length() == 0)
|
||||||
|
|| !translitBuffer.get(translitBuffer.length() - 1).getTranslit().endsWith(lastPairTranslit)) {
|
||||||
int l;
|
int l;
|
||||||
if ((l = translitBuffer.length()) > 0) {
|
if ((l = translitBuffer.length()) > 0) {
|
||||||
char lc = translitBuffer.charAt(l - 1);
|
String s = translitBuffer.get(l - 1).getTranslit();
|
||||||
|
char lc = s.charAt(s.length() - 1);
|
||||||
ThdlDebug.verify(lc == ((EWTSNotACIP) ? 'a' : 'A') /* hard-coded ACIP and EWTS values */);
|
ThdlDebug.verify(lc == ((EWTSNotACIP) ? 'a' : 'A') /* hard-coded ACIP and EWTS values */);
|
||||||
lastPairTranslit = lastPairTranslit + lc; /* 'da'i can cause this */
|
lastPairTranslit = lastPairTranslit + lc; /* 'da'i can cause this */
|
||||||
} else {
|
} else {
|
||||||
|
@ -1594,7 +1617,8 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
String y;
|
String y;
|
||||||
translitBuffer.append(EWTSNotACIP
|
translitBuffer.append(EWTSNotACIP
|
||||||
? (y = tp.getWylie(lastPairTranslit))
|
? (y = tp.getWylie(lastPairTranslit))
|
||||||
: (y = tp.getACIP(lastPairTranslit)));
|
: (y = tp.getACIP(lastPairTranslit)),
|
||||||
|
fontSize);
|
||||||
if (appendaged)
|
if (appendaged)
|
||||||
lastPairTranslit = y;
|
lastPairTranslit = y;
|
||||||
}
|
}
|
||||||
|
@ -1619,23 +1643,23 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
* corresponding to these glyphs, then noSuch[0] will be set to true
|
* corresponding to these glyphs, then noSuch[0] will be set to true
|
||||||
* @param warnings either null or a buffer to which will be appended
|
* @param warnings either null or a buffer to which will be appended
|
||||||
* warnings about illegal tsheg bars
|
* warnings about illegal tsheg bars
|
||||||
* @return the Extended Wylie/ACIP corresponding to these glyphs, or
|
* @return the Extended Wylie/ACIP corresponding to these glyphs (with
|
||||||
* null */
|
* font size info), or null */
|
||||||
private static String getTranslitImplementation(boolean EWTSNotACIP,
|
private static TranslitList getTranslitImplementation(boolean EWTSNotACIP,
|
||||||
DuffCode[] dcs,
|
SizedDuffCode[] dcs,
|
||||||
boolean noSuch[],
|
boolean noSuch[],
|
||||||
StringBuffer warnings) {
|
StringBuffer warnings) {
|
||||||
if (dcs.length == 0)
|
if (dcs.length == 0)
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
ArrayList glyphList = new ArrayList();
|
ArrayList glyphList = new ArrayList();
|
||||||
StringBuffer translitBuffer = new StringBuffer();
|
TranslitList translitBuffer = new TranslitList();
|
||||||
|
|
||||||
// DLC FIXME: " " should become " " for ACIP
|
// DLC FIXME: " " should become " " for ACIP
|
||||||
for (int i=0; i<dcs.length; i++) {
|
for (int i = 0; i < dcs.length; i++) {
|
||||||
char ch = dcs[i].getCharacter();
|
char ch = dcs[i].getDuffCode().getCharacter();
|
||||||
int k = dcs[i].getCharNum();
|
int k = dcs[i].getDuffCode().getCharNum();
|
||||||
// int fontNum = dcs[i].getFontNum();
|
int fsz = dcs[i].getFontSize();
|
||||||
|
|
||||||
if (k < 32) {
|
if (k < 32) {
|
||||||
if (!glyphList.isEmpty()) {
|
if (!glyphList.isEmpty()) {
|
||||||
|
@ -1649,14 +1673,17 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
// In ACIP, \n\n (or \r\n\r\n with DOS line feeds)
|
// In ACIP, \n\n (or \r\n\r\n with DOS line feeds)
|
||||||
// indicates a real line break.
|
// indicates a real line break.
|
||||||
if (!EWTSNotACIP && '\n' == ch) {
|
if (!EWTSNotACIP && '\n' == ch) {
|
||||||
if (i > 0 && dcs[i - 1].getCharacter() == '\r')
|
if (i > 0
|
||||||
translitBuffer.append("\r\n");
|
&& dcs[i - 1].getDuffCode().getCharacter() == '\r')
|
||||||
|
translitBuffer.append("\r\n", fsz);
|
||||||
else
|
else
|
||||||
translitBuffer.append(ch);
|
translitBuffer.append(ch, fsz);
|
||||||
}
|
}
|
||||||
translitBuffer.append(ch);
|
translitBuffer.append(ch, fsz);
|
||||||
} else {
|
} else {
|
||||||
String wylie = TibetanMachineWeb.getWylieForGlyph(dcs[i], noSuch);
|
String wylie
|
||||||
|
= TibetanMachineWeb.getWylieForGlyph(dcs[i].getDuffCode(),
|
||||||
|
noSuch);
|
||||||
String acip = null;
|
String acip = null;
|
||||||
if (!EWTSNotACIP) {
|
if (!EWTSNotACIP) {
|
||||||
// U+0F04 and U+0F05 -- these require lookahead to
|
// U+0F04 and U+0F05 -- these require lookahead to
|
||||||
|
@ -1665,12 +1692,12 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
|
|
||||||
int howManyConsumed[] = new int[] { -1 /* invalid */ };
|
int howManyConsumed[] = new int[] { -1 /* invalid */ };
|
||||||
|
|
||||||
acip = TibetanMachineWeb.getACIPForGlyph(dcs[i],
|
acip = TibetanMachineWeb.getACIPForGlyph(dcs[i].getDuffCode(),
|
||||||
((i+1<dcs.length)
|
((i+1<dcs.length)
|
||||||
? dcs[i+1]
|
? dcs[i+1].getDuffCode()
|
||||||
: null),
|
: null),
|
||||||
((i+2<dcs.length)
|
((i+2<dcs.length)
|
||||||
? dcs[i+2]
|
? dcs[i+2].getDuffCode()
|
||||||
: null),
|
: null),
|
||||||
noSuch,
|
noSuch,
|
||||||
howManyConsumed);
|
howManyConsumed);
|
||||||
|
@ -1690,7 +1717,8 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
warnings, translitBuffer);
|
warnings, translitBuffer);
|
||||||
glyphList.clear();
|
glyphList.clear();
|
||||||
}
|
}
|
||||||
translitBuffer.append(EWTSNotACIP ? wylie : acip); //append the punctuation
|
//append the punctuation:
|
||||||
|
translitBuffer.append(EWTSNotACIP ? wylie : acip, fsz);
|
||||||
} else {
|
} else {
|
||||||
glyphList.add(dcs[i]);
|
glyphList.add(dcs[i]);
|
||||||
}
|
}
|
||||||
|
@ -1708,10 +1736,10 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (translitBuffer.length() > 0) {
|
if (translitBuffer.length() > 0) {
|
||||||
return translitBuffer.toString();
|
return translitBuffer;
|
||||||
}
|
} else {
|
||||||
else
|
|
||||||
return null;
|
return null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns "root" instead of "appendaged-root", for example. */
|
/** Returns "root" instead of "appendaged-root", for example. */
|
||||||
|
|
|
@ -382,69 +382,69 @@ public class TibetanDocument extends DefaultStyledDocument {
|
||||||
return getTranslit(false, begin, end, noSuchACIP);
|
return getTranslit(false, begin, end, noSuchACIP);
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getTranslit(boolean EWTSNotACIP, int begin, int end, boolean noSuch[]) {
|
private String getTranslit(boolean EWTSNotACIP, int begin, int end, boolean noSuch[]) {
|
||||||
AttributeSet attr;
|
AttributeSet attr;
|
||||||
String fontName;
|
String fontName;
|
||||||
int fontNum;
|
int fontNum;
|
||||||
DuffCode dc;
|
char ch;
|
||||||
char ch;
|
|
||||||
|
|
||||||
if (begin >= end)
|
if (begin >= end)
|
||||||
return "";
|
return "";
|
||||||
|
|
||||||
java.util.List dcs = new ArrayList();
|
java.util.List dcs = new ArrayList();
|
||||||
int i = begin;
|
int i = begin;
|
||||||
StringBuffer translitBuffer = new StringBuffer();
|
TranslitList translitBuffer = new TranslitList();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
while (i < end) {
|
while (i < end) {
|
||||||
attr = getCharacterElement(i).getAttributes();
|
attr = getCharacterElement(i).getAttributes();
|
||||||
fontName = StyleConstants.getFontFamily(attr);
|
fontName = StyleConstants.getFontFamily(attr);
|
||||||
|
int fsz
|
||||||
|
= ((Integer)attr.getAttribute(StyleConstants.FontSize)).intValue();
|
||||||
|
|
||||||
ch = getText(i,1).charAt(0);
|
ch = getText(i,1).charAt(0);
|
||||||
|
|
||||||
//current character is formatting
|
//current character is formatting
|
||||||
if (ch == '\n' || ch == '\t') {
|
if (ch == '\n' || ch == '\t') {
|
||||||
if (dcs.size() > 0) {
|
if (dcs.size() > 0) {
|
||||||
DuffCode[] dc_array = new DuffCode[0];
|
SizedDuffCode[] dc_array
|
||||||
dc_array = (DuffCode[])dcs.toArray(dc_array);
|
= (SizedDuffCode[])dcs.toArray(new SizedDuffCode[0]);
|
||||||
translitBuffer.append(TibTextUtils.getTranslit(EWTSNotACIP, dc_array, noSuch));
|
translitBuffer.append(TibTextUtils.getTranslit(EWTSNotACIP, dc_array, noSuch));
|
||||||
dcs.clear();
|
dcs.clear();
|
||||||
}
|
}
|
||||||
translitBuffer.append(ch);
|
translitBuffer.append(ch, fsz);
|
||||||
}
|
}
|
||||||
|
//current character isn't TMW
|
||||||
|
else if ((0 == (fontNum = TibetanMachineWeb.getTMWFontNumber(fontName)))) {
|
||||||
|
if (dcs.size() > 0) {
|
||||||
|
SizedDuffCode[] dc_array
|
||||||
|
= (SizedDuffCode[])dcs.toArray(new SizedDuffCode[0]);
|
||||||
|
translitBuffer.append(TibTextUtils.getTranslit(EWTSNotACIP, dc_array, noSuch));
|
||||||
|
dcs.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//current character is convertable
|
||||||
|
else {
|
||||||
|
dcs.add(new SizedDuffCode(new DuffCode(fontNum, ch), fsz));
|
||||||
|
}
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
if (dcs.size() > 0) {
|
||||||
|
SizedDuffCode[] dc_array
|
||||||
|
= (SizedDuffCode[])dcs.toArray(new SizedDuffCode[0]);
|
||||||
|
translitBuffer.append(TibTextUtils.getTranslit(EWTSNotACIP,
|
||||||
|
dc_array,
|
||||||
|
noSuch));
|
||||||
|
}
|
||||||
|
return translitBuffer.getString();
|
||||||
|
}
|
||||||
|
catch (BadLocationException ble) {
|
||||||
|
ble.printStackTrace();
|
||||||
|
ThdlDebug.noteIffyCode();
|
||||||
|
}
|
||||||
|
|
||||||
//current character isn't TMW
|
return "";
|
||||||
else if ((0 == (fontNum = TibetanMachineWeb.getTMWFontNumber(fontName)))) {
|
}
|
||||||
if (dcs.size() > 0) {
|
|
||||||
DuffCode[] dc_array = new DuffCode[0];
|
|
||||||
dc_array = (DuffCode[])dcs.toArray(dc_array);
|
|
||||||
translitBuffer.append(TibTextUtils.getTranslit(EWTSNotACIP, dc_array, noSuch));
|
|
||||||
dcs.clear();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//current character is convertable
|
|
||||||
else {
|
|
||||||
dc = new DuffCode(fontNum, ch);
|
|
||||||
dcs.add(dc);
|
|
||||||
}
|
|
||||||
i++;
|
|
||||||
}
|
|
||||||
if (dcs.size() > 0) {
|
|
||||||
DuffCode[] dc_array = new DuffCode[0];
|
|
||||||
dc_array = (DuffCode[])dcs.toArray(dc_array);
|
|
||||||
translitBuffer.append(TibTextUtils.getTranslit(EWTSNotACIP, dc_array, noSuch));
|
|
||||||
}
|
|
||||||
return translitBuffer.toString();
|
|
||||||
}
|
|
||||||
catch (BadLocationException ble) {
|
|
||||||
ble.printStackTrace();
|
|
||||||
ThdlDebug.noteIffyCode();
|
|
||||||
}
|
|
||||||
|
|
||||||
return "";
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Prints to standard output a list of all the indices of
|
/** Prints to standard output a list of all the indices of
|
||||||
characters that are not in a TMW font within the range [start,
|
characters that are not in a TMW font within the range [start,
|
||||||
|
@ -1202,8 +1202,6 @@ public class TibetanDocument extends DefaultStyledDocument {
|
||||||
|
|
||||||
try {
|
try {
|
||||||
boolean noSuchWylie[] = new boolean[] { false };
|
boolean noSuchWylie[] = new boolean[] { false };
|
||||||
DuffCode[] any_dc_array = new DuffCode[0];
|
|
||||||
DuffCode[] dc_array;
|
|
||||||
Position endPos = createPosition(end);
|
Position endPos = createPosition(end);
|
||||||
int i = start;
|
int i = start;
|
||||||
java.util.List dcs = new ArrayList();
|
java.util.List dcs = new ArrayList();
|
||||||
|
@ -1213,39 +1211,46 @@ public class TibetanDocument extends DefaultStyledDocument {
|
||||||
= getCharacterElement(i).getAttributes();
|
= getCharacterElement(i).getAttributes();
|
||||||
String fontName = StyleConstants.getFontFamily(attr);
|
String fontName = StyleConstants.getFontFamily(attr);
|
||||||
int fontNum;
|
int fontNum;
|
||||||
|
int iFontSize = 72; /* the failure ought to be obvious
|
||||||
|
at this size */
|
||||||
|
try {
|
||||||
|
iFontSize
|
||||||
|
= ((Integer)attr.getAttribute(StyleConstants.FontSize)).intValue();
|
||||||
|
} catch (Exception e) {
|
||||||
|
// leave it as 72
|
||||||
|
}
|
||||||
|
|
||||||
if ((0 == (fontNum
|
if ((0 == (fontNum
|
||||||
= TibetanMachineWeb.getTMWFontNumber(fontName)))
|
= TibetanMachineWeb.getTMWFontNumber(fontName)))
|
||||||
|| i==endPos.getOffset()) {
|
|| i==endPos.getOffset()) {
|
||||||
if (i != start) {
|
if (i != start) {
|
||||||
dc_array = (DuffCode[])dcs.toArray(any_dc_array);
|
SizedDuffCode[] sdc_array
|
||||||
|
= (SizedDuffCode[])dcs.toArray(new SizedDuffCode[0]);
|
||||||
/* Low-priority FIXME: If the font size
|
|
||||||
changes within a tsheg bar, the roman
|
|
||||||
output will not mimic such changes. */
|
|
||||||
|
|
||||||
// SPEED_FIXME: determining font size might be slow
|
|
||||||
int fontSize = 72; /* the failure ought to be
|
|
||||||
obvious at this size */
|
|
||||||
try {
|
|
||||||
fontSize = ((Integer)getCharacterElement(start).getAttributes().getAttribute(StyleConstants.FontSize)).intValue();
|
|
||||||
} catch (Exception e) {
|
|
||||||
// leave it as 72
|
|
||||||
}
|
|
||||||
|
|
||||||
remove(start, i-start);
|
remove(start, i-start);
|
||||||
ThdlDebug.verify(getRomanAttributeSet() != null);
|
ThdlDebug.verify(getRomanAttributeSet() != null);
|
||||||
insertString(start,
|
TranslitList tb
|
||||||
TibTextUtils.getTranslit(EWTSNotACIP,
|
= TibTextUtils.getTranslit(EWTSNotACIP,
|
||||||
dc_array,
|
sdc_array,
|
||||||
noSuchWylie),
|
noSuchWylie);
|
||||||
getCopyOfRomanAttributeSet(fontSize));
|
int lastFontSize = -1;
|
||||||
|
for (int j = 0; j < tb.length(); j++) {
|
||||||
|
TranslitTuple tt = tb.get(j);
|
||||||
|
int thisFontSize;
|
||||||
|
insertString(start,
|
||||||
|
tt.getTranslit(),
|
||||||
|
getCopyOfRomanAttributeSet(thisFontSize = tt.getFontSize()));
|
||||||
|
if (thisFontSize == lastFontSize)
|
||||||
|
throw new Error("FIXME: make this an assertion");
|
||||||
|
lastFontSize = thisFontSize;
|
||||||
|
}
|
||||||
dcs.clear();
|
dcs.clear();
|
||||||
}
|
}
|
||||||
start = i+1;
|
start = i+1;
|
||||||
} else {
|
} else {
|
||||||
char ch = getText(i,1).charAt(0);
|
char ch = getText(i,1).charAt(0);
|
||||||
dcs.add(new DuffCode(fontNum, ch));
|
dcs.add(new SizedDuffCode(new DuffCode(fontNum, ch),
|
||||||
|
iFontSize));
|
||||||
++numAttemptedReplacements[0];
|
++numAttemptedReplacements[0];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
109
source/org/thdl/tib/text/TranslitList.java
Normal file
109
source/org/thdl/tib/text/TranslitList.java
Normal file
|
@ -0,0 +1,109 @@
|
||||||
|
/*
|
||||||
|
The contents of this file are subject to the THDL Open Community License
|
||||||
|
Version 1.0 (the "License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License on the THDL web site
|
||||||
|
(http://www.thdl.org/).
|
||||||
|
|
||||||
|
Software distributed under the License is distributed on an "AS IS" basis,
|
||||||
|
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
|
||||||
|
License for the specific terms governing rights and limitations under the
|
||||||
|
License.
|
||||||
|
|
||||||
|
The Initial Developer of this software is the Tibetan and Himalayan Digital
|
||||||
|
Library (THDL). Portions created by the THDL are Copyright 2004 THDL.
|
||||||
|
All Rights Reserved.
|
||||||
|
|
||||||
|
Contributor(s): ______________________________________.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.thdl.tib.text;
|
||||||
|
|
||||||
|
import java.util.Vector;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A mutable representation of Roman transliteration with font size
|
||||||
|
* information for each character of transliteration.
|
||||||
|
*
|
||||||
|
* @author David Chandler */
|
||||||
|
class TranslitList {
|
||||||
|
/** Invariant: For all 0<=i<length(),
|
||||||
|
((TranslitTuple)vec.get(i)).getFontSize() !=
|
||||||
|
((TranslitTuple)vec.get(i+1)).getFontSize(). */
|
||||||
|
private final Vector /* of TranslitTuple */ vec;
|
||||||
|
|
||||||
|
/** Creates an empty list. */
|
||||||
|
public TranslitList() {
|
||||||
|
this.vec = new Vector();
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns the number of TranslitTuples in this list. */
|
||||||
|
public int length() {
|
||||||
|
return vec.size();
|
||||||
|
}
|
||||||
|
|
||||||
|
public TranslitTuple get(int i) throws ArrayIndexOutOfBoundsException {
|
||||||
|
return (TranslitTuple)vec.get(i);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Appends to the end of this list a single character of Roman
|
||||||
|
transliteration with the given font size. The last element of
|
||||||
|
this list will have s appended to it if font sizes are the
|
||||||
|
same; otherwise this list grows by an element. */
|
||||||
|
public void append(char ch, int fontSize) {
|
||||||
|
append(new String(new char[] { ch }), fontSize);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Appends to the end of this list a stretch s of Roman
|
||||||
|
transliteration that has font size fontSize. The last element
|
||||||
|
of this list will have s appended to it if font sizes are the
|
||||||
|
same; otherwise this list grows by an element. */
|
||||||
|
public void append(String s, int fontSize) {
|
||||||
|
if (vec.isEmpty()) {
|
||||||
|
vec.add(new TranslitTuple(s, fontSize));
|
||||||
|
} else {
|
||||||
|
TranslitTuple tt = (TranslitTuple)vec.lastElement();
|
||||||
|
TranslitTuple newtt
|
||||||
|
= tt.getPossiblyCombinedTranslitTuple(s, fontSize);
|
||||||
|
if (tt != newtt)
|
||||||
|
vec.add(newtt);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Appends to the end of this list another TranslitList. The
|
||||||
|
length of this list may or may not increase; the first element
|
||||||
|
of tb and the last element of this list will be merged if
|
||||||
|
their font sizes are the same. */
|
||||||
|
public void append(TranslitList tb) {
|
||||||
|
if (this == tb)
|
||||||
|
throw new IllegalArgumentException("Cannot be this list, that would be bad!");
|
||||||
|
if (this.vec.isEmpty() || tb.vec.isEmpty()) {
|
||||||
|
this.vec.addAll(tb.vec);
|
||||||
|
} else {
|
||||||
|
int lbefore = this.length();
|
||||||
|
this.vec.addAll(tb.vec);
|
||||||
|
if (((TranslitTuple)tb.vec.firstElement()).getFontSize()
|
||||||
|
== ((TranslitTuple)this.vec.lastElement()).getFontSize()) {
|
||||||
|
// merge stretches with the same font size.
|
||||||
|
TranslitTuple a = (TranslitTuple)this.vec.remove(lbefore-1);
|
||||||
|
this.vec.set(lbefore-1,
|
||||||
|
a.getPossiblyCombinedTranslitTuple(((TranslitTuple)this.vec.get(lbefore-1)).getTranslit(),
|
||||||
|
((TranslitTuple)this.vec.get(lbefore-1)).getFontSize()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Do not call this -- it throws an error. */
|
||||||
|
public String toString() {
|
||||||
|
throw new Error("There was a bug where this was called, so don't call this.");
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns the full Roman transliteration. You don't get font
|
||||||
|
size information this way, of course. */
|
||||||
|
public String getString() {
|
||||||
|
StringBuffer sb = new StringBuffer();
|
||||||
|
for (int i = 0; i < length(); i++) {
|
||||||
|
sb.append(get(i).getTranslit());
|
||||||
|
}
|
||||||
|
return sb.toString();
|
||||||
|
}
|
||||||
|
}
|
61
source/org/thdl/tib/text/TranslitTuple.java
Normal file
61
source/org/thdl/tib/text/TranslitTuple.java
Normal file
|
@ -0,0 +1,61 @@
|
||||||
|
/*
|
||||||
|
The contents of this file are subject to the THDL Open Community License
|
||||||
|
Version 1.0 (the "License"); you may not use this file except in compliance
|
||||||
|
with the License. You may obtain a copy of the License on the THDL web site
|
||||||
|
(http://www.thdl.org/).
|
||||||
|
|
||||||
|
Software distributed under the License is distributed on an "AS IS" basis,
|
||||||
|
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
|
||||||
|
License for the specific terms governing rights and limitations under the
|
||||||
|
License.
|
||||||
|
|
||||||
|
The Initial Developer of this software is the Tibetan and Himalayan Digital
|
||||||
|
Library (THDL). Portions created by the THDL are Copyright 2004 THDL.
|
||||||
|
All Rights Reserved.
|
||||||
|
|
||||||
|
Contributor(s): ______________________________________.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.thdl.tib.text;
|
||||||
|
|
||||||
|
import java.util.Vector;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A stretch of Roman transliteration all in a certain font size.
|
||||||
|
*
|
||||||
|
* @author David Chandler */
|
||||||
|
class TranslitTuple {
|
||||||
|
private final StringBuffer sb;
|
||||||
|
private final int sz;
|
||||||
|
|
||||||
|
/** Creates a TranslitTuple representing the transliteration s,
|
||||||
|
which has font size sz. */
|
||||||
|
public TranslitTuple(String s, int sz) {
|
||||||
|
this.sb = new StringBuffer(s);
|
||||||
|
this.sz = sz;
|
||||||
|
// FIXME: assert(s.length() > 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Appends the transliteration s to this tuple and returns this
|
||||||
|
tuple if sz, the font size for s, is the same as this tuple's
|
||||||
|
font size. Returns a new tuple for s otherwise. */
|
||||||
|
public TranslitTuple getPossiblyCombinedTranslitTuple(String s, int sz) {
|
||||||
|
if (this.sz == sz) {
|
||||||
|
sb.append(s);
|
||||||
|
return this;
|
||||||
|
} else {
|
||||||
|
return new TranslitTuple(s, sz);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns the stretch of Roman transliteration. */
|
||||||
|
public String getTranslit() { return sb.toString(); }
|
||||||
|
|
||||||
|
/** Returns the font size of the Roman transliteration. */
|
||||||
|
public int getFontSize() { return sz; }
|
||||||
|
|
||||||
|
/** Do not call this -- it throws an error. */
|
||||||
|
public String toString() {
|
||||||
|
throw new Error("There was a bug where this was called, so don't call this.");
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in a new issue