A preliminary TMW->ACIP converter is here. There are known bugs, mostly with rare punctuation.

This commit is contained in:
dchandler 2003-09-02 06:39:33 +00:00
parent cc9ab06864
commit 316f59107b
9 changed files with 278 additions and 88 deletions

View file

@ -417,6 +417,8 @@ class ConvertDialog extends JDialog
} else { // conversion {to Wylie or TM} mode
if (TMW_TO_WYLIE == ct) {
newFileNamePrefix = suggested_WYLIE_prefix;
} else if (TMW_TO_ACIP == ct) {
newFileNamePrefix = suggested_ACIP_prefix;
} else if (TMW_TO_UNI == ct || ACIP_TO_UNI == ct) {
newFileNamePrefix = suggested_TO_UNI_prefix;
} else if (TM_TO_TMW == ct || ACIP_TO_TMW == ct) {

View file

@ -78,6 +78,12 @@ public class ConverterGUI implements FontConversion, FontConverterConstants {
"Attention required",
JOptionPane.ERROR_MESSAGE);
return false;
} else if (49 == returnCode) {
JOptionPane.showMessageDialog(cd,
"Though an output file has been created, it contains ugly\nerror messages like\n\"<<[[JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP:\n Cannot convert DuffCode...\".\nPlease edit the output by hand to replace all such\ncreatures with the correct ACIP transliteration.",
"Attention required",
JOptionPane.ERROR_MESSAGE);
return false;
} else if (43 == returnCode) {
JOptionPane.showMessageDialog(cd,
"Though an output file has been created, this conversion did nothing.\nDid you choose the correct original file?\nDid you choose the correct type of conversion?",

View file

@ -31,6 +31,7 @@ interface FontConverterConstants
final String TM_TO_TMW = "TM to TMW";
final String TMW_TO_UNI = "TMW to Unicode";
final String TMW_TO_WYLIE = "TMW to Wylie";
final String TMW_TO_ACIP = "TMW to ACIP";
final String TMW_TO_TM = "TMW to TM";
final String FIND_SOME_NON_TMW = "Find some non-TMW";
final String FIND_SOME_NON_TM = "Find some non-TM";
@ -43,6 +44,7 @@ interface FontConverterConstants
TM_TO_TMW,
TMW_TO_UNI,
TMW_TO_WYLIE,
TMW_TO_ACIP,
TMW_TO_TM,
FIND_SOME_NON_TMW,
FIND_SOME_NON_TM,
@ -51,6 +53,7 @@ interface FontConverterConstants
};
final String suggested_WYLIE_prefix = "THDL_Wylie_";
final String suggested_ACIP_prefix = "ACIP_";
final String suggested_TO_TMW_prefix = "TMW_";
final String suggested_TO_UNI_prefix = "Uni_";
final String suggested_TO_TM_prefix = "TM_";

View file

@ -74,6 +74,7 @@ public class TibetanConverter implements FontConverterConstants {
boolean convertACIPToTMWMode = false;
boolean convertToTMWMode = false;
boolean convertToWylieMode = false;
boolean convertToACIPMode = false;
boolean findSomeNonTMWMode = false;
boolean findAllNonTMWMode = false;
boolean findSomeNonTMMode = false;
@ -98,6 +99,8 @@ public class TibetanConverter implements FontConverterConstants {
= args[0].equals("--to-unicode"))
|| (convertToWylieMode
= args[0].equals("--to-wylie"))
|| (convertToACIPMode
= args[0].equals("--to-acip"))
|| (findSomeNonTMWMode
= args[0].equals("--find-some-non-tmw"))
|| (findSomeNonTMMode
@ -107,7 +110,7 @@ public class TibetanConverter implements FontConverterConstants {
))) {
out.println("TibetanConverter [--find-all-non-tmw | --find-some-non-tmw");
out.println(" | --to-tibetan-machine | --to-tibetan-machine-web");
out.println(" | --to-unicode | --to-wylie] RTF_file");
out.println(" | --to-unicode | --to-wylie | --to-acip] RTF_file");
out.println(" | TibetanConverter --acip-to-unicode TXT_file");
out.println(" | TibetanConverter [--version | -v | --help | -h]");
out.println("");
@ -120,6 +123,7 @@ public class TibetanConverter implements FontConverterConstants {
out.println(" --to-unicode to convert TibetanMachineWeb to Unicode");
out.println(" --to-tibetan-machine-web to convert TibetanMachine to TibetanMachineWeb");
out.println(" --to-wylie to convert TibetanMachineWeb to THDL Extended Wylie");
out.println(" --to-acip to convert TibetanMachineWeb to ACIP");
out.println(" --acip-to-unicode to convert ACIP text file to Unicode text file");
out.println(" --find-all-non-tmw to locate all characters in the input document that are");
out.println(" not in Tibetan Machine Web fonts, exit zero if and only if none found");
@ -177,6 +181,8 @@ public class TibetanConverter implements FontConverterConstants {
} else { // conversion {to Wylie or TM} mode
if (convertToWylieMode) {
conversionTag = TMW_TO_WYLIE;
} else if (convertToACIPMode) {
conversionTag = TMW_TO_ACIP;
} else if (convertToUnicodeMode) {
conversionTag = TMW_TO_UNI;
} else if (convertToTMWMode) {
@ -311,6 +317,7 @@ public class TibetanConverter implements FontConverterConstants {
ThdlDebug.verify(((TMW_TO_TM == ct) ? 1 : 0)
+ ((TMW_TO_UNI == ct) ? 1 : 0)
+ ((TM_TO_TMW == ct) ? 1 : 0)
+ ((TMW_TO_ACIP == ct) ? 1 : 0)
+ ((TMW_TO_WYLIE == ct) ? 1 : 0)
== 1);
long numAttemptedReplacements[] = new long[] { 0 };
@ -321,6 +328,13 @@ public class TibetanConverter implements FontConverterConstants {
numAttemptedReplacements)) {
exitCode = 44;
}
} else if (TMW_TO_ACIP == ct) {
// Convert to ACIP:
if (!tdoc.toACIP(0,
tdoc.getLength(),
numAttemptedReplacements)) {
exitCode = 49;
}
} else if (TMW_TO_UNI == ct) {
StringBuffer errors = new StringBuffer();
// Convert to Unicode:

View file

@ -44,8 +44,13 @@ public class Manipulate
return ch=='a' || ch=='e' || ch=='i' || ch=='o' || ch=='u';
}
/** Returns null on error. */
public static String wylieToAcip(String palabra)
{
// DLC FIXME: for unknown things, return null.
if (palabra.equals("@#")) return "*";
if (palabra.startsWith("@") || palabra.startsWith("#"))
return null; // we can't convert this in isolation! We need context.
char []caract;
int i, j, len;
String nuevaPalabra;
@ -83,6 +88,12 @@ public class Manipulate
nuevaPalabra = replace(nuevaPalabra, "TSH", "TQQ");
nuevaPalabra = replace(nuevaPalabra, "TS", "TZ");
nuevaPalabra = replace(nuevaPalabra, "TQQ", "TS");
nuevaPalabra = replace(nuevaPalabra, "a", "'A");
nuevaPalabra = replace(nuevaPalabra, "i", "'I");
nuevaPalabra = replace(nuevaPalabra, "u", "'U");
nuevaPalabra = replace(nuevaPalabra, "-I", "i");
nuevaPalabra = replace(nuevaPalabra, "/", ",");
nuevaPalabra = replace(nuevaPalabra, "_", " ");
nuevaPalabra = fixWazur(nuevaPalabra);
return nuevaPalabra;
}

View file

@ -86,6 +86,30 @@ public class TGCPair {
b.append(vowelWylie);
return b.toString();
}
public String getACIP() {
// DLC FIXME: has the EWTS change affected Manipulate.acipToWylie?
StringBuffer b = new StringBuffer();
if (consonantWylie != null) {
String consonantACIP // DLC FIXME can KAsh occur?
= org.thdl.tib.scanner.Manipulate.wylieToAcip(consonantWylie);
if (null == consonantACIP) throw new Error("how?");
// System.out.println("DLC: Wylie=" + consonantWylie + ", ACIP=" + consonantACIP);
// we may have {P-Y}, but the user wants to see {PY}.
for (int i = 0; i < consonantACIP.length(); i++) {
char ch = consonantACIP.charAt(i);
if ('-' != ch)
b.append(ch);
}
}
if (vowelWylie != null) {
String vowelACIP // DLC FIXME look for exceptions
= org.thdl.tib.scanner.Manipulate.wylieToAcip(vowelWylie);
// System.out.println("DLC: Wylie=" + vowelWylie + ", ACIP=" + vowelACIP);
if (null == vowelACIP) throw new Error("how?");
b.append(vowelACIP);
}
return b.toString();
}
public int classification;
/** Constructs a new TGCPair with (Tibetan or Sanskrit) consonant
* consonantWylie and vowel vowelWylie. Use

View file

@ -786,37 +786,50 @@ public class TibTextUtils implements THDLWylieConstants {
private static final boolean makeIllegalTibetanGoEndToEnd = true;
/** Returns "a", unless wylie is already "a". */
private static String aVowelToUseAfter(String wylie) {
/** Returns "a"/"A", unless wylie (which really is EWTS, not ACIP)
is already "a". */
private static String aVowelToUseAfter(boolean EWTSNotACIP, String wylie) {
if (wylie.equals(ACHEN))
return "";
else
return WYLIE_aVOWEL;
return (EWTSNotACIP) ? WYLIE_aVOWEL : "A";
}
private static String unambiguousPostAVowelWylie(String wylie1,
String wylie2) {
private static String unambiguousPostAVowelTranslit(boolean EWTSNotACIP,
String wylie1,
String wylie2,
String acip1,
String acip2) {
String disambiguator = "";
// type "lard" vs. "lar.d", and you'll see the need for this
// disambiguation of suffix and postsuffix. sa doesn't take
// any head letters, so only da needs to be considered.
if (TibetanMachineWeb.isWylieTop(wylie1)
&& wylie2.equals(/* FIXME: hard-coded */ "d"))
disambiguator = WYLIE_DISAMBIGUATING_KEY_STRING;
return wylie1 + disambiguator + wylie2;
disambiguator = (EWTSNotACIP) ? WYLIE_DISAMBIGUATING_KEY_STRING : "-";
if (EWTSNotACIP)
return wylie1 + disambiguator + wylie2;
else
return acip1 + disambiguator + acip2;
}
/**
* Gets the Extended Wylie for a sequence of glyphs.
* Gets the Extended Wylie for the given sequence of glyphs if
* EWTSNotACIP is true, or the ACIP otherwise.
* @param EWTSNotACIP true if you want THDL Extended Wylie, false if
* you want ACIP
* @param dcs an array of glyphs
* @param noSuchWylie an array which will not be touched if this is
* successful; however, if there is no THDL Extended Wylie
* corresponding to these glyphs, then noSuchWylie[0] will be set to
* true
* @return the Extended Wylie corresponding to these glyphs, or null */
public static String getWylie(DuffCode[] dcs, boolean noSuchWylie[]) {
* @param noSuch an array which will not be touched if this is
* successful; however, if there is no THDL Extended Wylie/ACIP
* corresponding to these glyphs, then noSuch[0] will be set to true
* @return the Extended Wylie/ACIP corresponding to these glyphs, or
* null */
public static String getTranslit(boolean EWTSNotACIP,
DuffCode[] dcs,
boolean noSuch[]) {
StringBuffer warnings = (debug ? new StringBuffer() : null);
String ans = getWylieImplementation(dcs, noSuchWylie, warnings);
String ans
= getTranslitImplementation(EWTSNotACIP, dcs, noSuch, warnings);
if (debug && warnings.length() > 0)
System.out.println("DEBUG: warnings in TMW->Wylie: " + warnings);
return ans;
@ -1172,13 +1185,13 @@ public class TibTextUtils implements THDLWylieConstants {
return candidateType;
}
/** Appends to wylieBuffer the wylie for the glyph list glyphList
(which should be an ArrayList for speed). This will be very
user-friendly for "legal tsheg bars" and will be valid, but
possibly ugly (interspersed with disambiguators or extra
vowels, etc.) Wylie for other things, such as Sanskrit
transliteration. Updates warnings and noSuchWylie like the
caller does.
/** Appends to translitBuffer the EWTS/ACIP for the glyph list
glyphList (which should be an ArrayList for speed). This will
be very user-friendly for "legal tsheg bars" and will be
valid, but possibly ugly (interspersed with disambiguators or
extra vowels, etc.) Wylie/ACIP for other things, such as
Sanskrit transliteration. Updates warnings and noSuch like
the caller does.
<p>What constitutes a legal, non-punctuation, non-whitespace
tsheg bar? The following are the only such:</p>
@ -1219,22 +1232,23 @@ public class TibTextUtils implements THDLWylieConstants {
<p>When there are three unadorned consonant stacks in a
tyllable, a hard-coded list of valid Tibetan tsheg bars is
relied upon to determine if the 'a' vowel comes after the
first or the second consonant.</p> */
private static void getTshegBarWylie(java.util.List glyphList,
boolean noSuchWylie[],
StringBuffer warnings,
StringBuffer wylieBuffer) {
relied upon to determine if the 'a'/'A' vowel comes after
the first or the second consonant.</p> */
private static void getTshegBarTranslit(boolean EWTSNotACIP,
java.util.List glyphList,
boolean noSuch[],
StringBuffer warnings,
StringBuffer translitBuffer) {
TGCList gcs
= breakTshegBarIntoGraphemeClusters(glyphList, noSuchWylie);
= breakTshegBarIntoGraphemeClusters(glyphList, noSuch);
String candidateType = getClassificationOfTshegBar(gcs, warnings, false);
int sz = gcs.size();
if (candidateType == "invalid"
|| candidateType == "single-sanskrit-gc") {
// Forget beauty and succintness -- just be sure to
// generate Wylie that can be converted unambiguously into
// Tibetan. Use a disambiguator or vowel after each
// grapheme cluster.
// generate transliteration that can be converted
// unambiguously into Tibetan. Use a disambiguator or
// vowel after each grapheme cluster.
//
// If we truly didn't care about beauty, we'd just lump
// SANSKRIT_WITHOUT_VOWEL and SANSKRIT_WITH_VOWEL into
@ -1244,19 +1258,20 @@ public class TibTextUtils implements THDLWylieConstants {
TGCPair tp = (TGCPair)gcs.get(i);
int cls = tp.classification;
String wylie = tp.getWylie();
wylieBuffer.append(wylie);
String translit = (EWTSNotACIP) ? wylie : tp.getACIP();
translitBuffer.append(translit);
if (TibetanMachineWeb.isWylieTibetanConsonantOrConsonantStack(wylie)
|| TibetanMachineWeb.isWylieSanskritConsonantStack(wylie)) {
wylieBuffer.append(aVowelToUseAfter(wylie));
translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie));
} else {
if (TGCPair.CONSONANTAL_WITH_VOWEL != cls
&& TGCPair.SANSKRIT_WITH_VOWEL != cls)
wylieBuffer.append(WYLIE_DISAMBIGUATING_KEY);
translitBuffer.append(EWTSNotACIP ? WYLIE_DISAMBIGUATING_KEY : '-');
}
}
} else {
// Generate perfect, beautiful, Wylie, using the minimum
// number of vowels and disambiguators.
// Generate perfect, beautiful transliteration, using the
// minimum number of vowels and disambiguators.
int leftover = sz + 1;
@ -1299,23 +1314,44 @@ public class TibTextUtils implements THDLWylieConstants {
String wylie1 = ((TGCPair)gcs.get(0)).getWylie();
String wylie2 = ((TGCPair)gcs.get(1)).getWylie();
String wylie3 = ((TGCPair)gcs.get(2)).getWylie();
String acip1 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(0)).getACIP();
String acip2 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(1)).getACIP();
String acip3 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(2)).getACIP();
if ((wylie1.equals("g") && (wylie2.equals("d") || wylie2.equals("n") || wylie2.equals("s")))
|| (wylie1.equals("d") && (wylie2.equals("g") || wylie2.equals("m")))
|| (wylie1.equals("b") && wylie2.equals("d"))
|| (wylie1.equals("m") && wylie2.equals("d"))
|| (wylie1.equals("'") && (wylie2.equals("g") || wylie2.equals("d") || wylie2.equals("b")))) {
if (TibetanMachineWeb.isAmbiguousWylie(wylie1, wylie2))
wylieBuffer.append(wylie1 + WYLIE_DISAMBIGUATING_KEY + wylie2);
if (EWTSNotACIP)
translitBuffer.append(wylie1 + WYLIE_DISAMBIGUATING_KEY + wylie2);
else
translitBuffer.append(acip1 + '-' + acip2);
else
wylieBuffer.append(wylie1 + wylie2);
if (EWTSNotACIP)
translitBuffer.append(wylie1 + wylie2);
else
translitBuffer.append(acip1 + acip2);
wylieBuffer.append(aVowelToUseAfter(wylie2)
+ wylie3);
translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie2)
+ (EWTSNotACIP ? wylie3 : acip3));
} else {
wylieBuffer.append(wylie1
+ aVowelToUseAfter(wylie1)
+ unambiguousPostAVowelWylie(wylie2,
wylie3));
if (EWTSNotACIP)
translitBuffer.append(wylie1
+ aVowelToUseAfter(EWTSNotACIP, wylie1)
+ unambiguousPostAVowelTranslit(EWTSNotACIP,
wylie2,
wylie3,
acip2,
acip3));
else
translitBuffer.append(acip1
+ aVowelToUseAfter(EWTSNotACIP, wylie1)
+ unambiguousPostAVowelTranslit(EWTSNotACIP,
wylie2,
wylie3,
acip2,
acip3));
}
} else if ("root" == candidateType
|| "prefix/root-root/suffix" == candidateType
@ -1323,13 +1359,14 @@ public class TibTextUtils implements THDLWylieConstants {
|| "root-suffix-postsuffix" == candidateType
|| "root-suffix" == candidateType) {
String wylie1 = ((TGCPair)gcs.get(0)).getWylie();
String acip1 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(0)).getACIP();
leftover = 1;
wylieBuffer.append(wylie1);
translitBuffer.append((EWTSNotACIP) ? wylie1 : acip1);
if (((TGCPair)gcs.get(0)).classification
!= TGCPair.CONSONANTAL_WITH_VOWEL) {
ThdlDebug.verify(TGCPair.CONSONANTAL_WITHOUT_VOWEL
== ((TGCPair)gcs.get(0)).classification);
wylieBuffer.append(aVowelToUseAfter(wylie1));
translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie1));
if (debug) System.out.println("DEBUG: appending vowel");
} else {
if (debug) System.out.println("DEBUG: already has vowel 2");
@ -1338,26 +1375,39 @@ public class TibTextUtils implements THDLWylieConstants {
leftover = 3;
String wylie2 = ((TGCPair)gcs.get(1)).getWylie();
String wylie3 = ((TGCPair)gcs.get(2)).getWylie();
wylieBuffer.append(unambiguousPostAVowelWylie(wylie2,
wylie3));
String acip2 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(1)).getACIP();
String acip3 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(2)).getACIP();
translitBuffer.append(unambiguousPostAVowelTranslit(EWTSNotACIP,
wylie2,
wylie3,
acip2,
acip3));
}
} else if ("prefix-root-suffix" == candidateType
|| "prefix-root" == candidateType
|| "prefix-root-suffix-postsuffix" == candidateType) {
String wylie1 = ((TGCPair)gcs.get(0)).getWylie();
String wylie2 = ((TGCPair)gcs.get(1)).getWylie();
String acip1 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(0)).getACIP();
String acip2 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(1)).getACIP();
leftover = 2;
if (TibetanMachineWeb.isAmbiguousWylie(wylie1, wylie2))
wylieBuffer.append(wylie1 + WYLIE_DISAMBIGUATING_KEY + wylie2);
if (EWTSNotACIP)
translitBuffer.append(wylie1 + WYLIE_DISAMBIGUATING_KEY + wylie2);
else
translitBuffer.append(acip1 + '-' + acip2);
else
wylieBuffer.append(wylie1 + wylie2);
if (EWTSNotACIP)
translitBuffer.append(wylie1 + wylie2);
else
translitBuffer.append(acip1 + acip2);
if (((TGCPair)gcs.get(1)).classification
!= TGCPair.CONSONANTAL_WITH_VOWEL) {
ThdlDebug.verify(TGCPair.CONSONANTAL_WITHOUT_VOWEL
== ((TGCPair)gcs.get(1)).classification);
if (debug) System.out.println("DEBUG: appending vowel");
wylieBuffer.append(aVowelToUseAfter(wylie2));
translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie2));
} else {
if (debug) System.out.println("DEBUG: already has vowel 1");
}
@ -1365,8 +1415,13 @@ public class TibTextUtils implements THDLWylieConstants {
leftover = 4;
String wylie3 = ((TGCPair)gcs.get(2)).getWylie();
String wylie4 = ((TGCPair)gcs.get(3)).getWylie();
wylieBuffer.append(unambiguousPostAVowelWylie(wylie3,
wylie4));
String acip3 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(2)).getACIP();
String acip4 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(3)).getACIP();
translitBuffer.append(unambiguousPostAVowelTranslit(EWTSNotACIP,
wylie3,
wylie4,
acip3,
acip4));
}
} else if ("number" == candidateType) {
leftover = 0;
@ -1374,18 +1429,17 @@ public class TibTextUtils implements THDLWylieConstants {
throw new Error("missed a case down here");
}
// append the wylie left over:
// append the wylie/ACIP left over:
for (int i = leftover; i < sz; i++) {
TGCPair tp = (TGCPair)gcs.get(i);
String wylie = tp.getWylie();
wylieBuffer.append(wylie);
translitBuffer.append(EWTSNotACIP ? tp.getWylie() : tp.getACIP());
}
}
}
/**
* Gets the Extended Wylie for a sequence of glyphs. This works as
* follows:
* Gets the Extended Wylie/ACIP for a sequence of glyphs. This works
* as follows:
*
* <p>We run along until we hit whitespace or punctuation. We take
* everything before that and we see if it's a legal Tibetan tsheg bar,
@ -1393,22 +1447,25 @@ public class TibTextUtils implements THDLWylieConstants {
* vowel in the correct place. If not, then we throw a disambiguating
* key or a vowel after each stack.
*
* @param EWTSNotACIP true if you want THDL Extended Wylie, false if
* you want ACIP
* @param dcs an array of glyphs
* @param noSuchWylie an array which will not be touched if this is
* successful; however, if there is no THDL Extended Wylie
* corresponding to these glyphs, then noSuchWylie[0] will be set to
* true
* @param noSuch an array which will not be touched if this is
* successful; however, if there is no THDL Extended Wylie/ACIP
* corresponding to these glyphs, then noSuch[0] will be set to true
* @param warnings either null or a buffer to which will be appended
* warnings about illegal tsheg bars
* @return the Extended Wylie corresponding to these glyphs, or null */
public static String getWylieImplementation(DuffCode[] dcs,
boolean noSuchWylie[],
StringBuffer warnings) {
* @return the Extended Wylie/ACIP corresponding to these glyphs, or
* null */
private static String getTranslitImplementation(boolean EWTSNotACIP,
DuffCode[] dcs,
boolean noSuch[],
StringBuffer warnings) {
if (dcs.length == 0)
return null;
ArrayList glyphList = new ArrayList();
StringBuffer wylieBuffer = new StringBuffer();
StringBuffer translitBuffer = new StringBuffer();
for (int i=0; i<dcs.length; i++) {
char ch = dcs[i].getCharacter();
@ -1417,41 +1474,43 @@ public class TibTextUtils implements THDLWylieConstants {
if (k < 32) {
if (!glyphList.isEmpty()) {
getTshegBarWylie(glyphList, noSuchWylie,
warnings, wylieBuffer);
getTshegBarTranslit(EWTSNotACIP, glyphList, noSuch,
warnings, translitBuffer);
glyphList.clear();
if (null != warnings)
warnings.append("Some glyphs came right before a newline; they did not have a tsheg or shad come first.");
}
wylieBuffer.append(ch);
translitBuffer.append(ch);
} else {
String wylie = TibetanMachineWeb.getWylieForGlyph(dcs[i], noSuchWylie);
String wylie = TibetanMachineWeb.getWylieForGlyph(dcs[i], noSuch);
String acip = EWTSNotACIP ? null : TibetanMachineWeb.getACIPForGlyph(dcs[i], noSuch);
if (TibetanMachineWeb.isWyliePunc(wylie)
&& !TibetanMachineWeb.isWylieAdornment(wylie)) {
if (!glyphList.isEmpty()) {
getTshegBarWylie(glyphList, noSuchWylie,
warnings, wylieBuffer);
getTshegBarTranslit(EWTSNotACIP, glyphList, noSuch,
warnings, translitBuffer);
glyphList.clear();
}
wylieBuffer.append(wylie); //append the punctuation
translitBuffer.append(EWTSNotACIP ? wylie : acip); //append the punctuation
} else {
glyphList.add(dcs[i]);
}
}
}
// replace remaining TMW with Wylie
// replace remaining TMW with transliteration
if (!glyphList.isEmpty()) {
getTshegBarWylie(glyphList, noSuchWylie, warnings, wylieBuffer);
getTshegBarTranslit(EWTSNotACIP, glyphList, noSuch,
warnings, translitBuffer);
// glyphList.clear() if we weren't about to exit...
if (null != warnings)
warnings.append("The stretch of Tibetan ended without final punctuation.");
}
if (wylieBuffer.length() > 0)
return wylieBuffer.toString();
if (translitBuffer.length() > 0)
return translitBuffer.toString();
else
return null;
}

View file

@ -294,6 +294,18 @@ public class TibetanDocument extends DefaultStyledDocument {
return getWylie(0, getLength(), noSuchWylie);
}
/**
* Converts the entire document into ACIP. If the document consists of
* both Tibetan and non-Tibetan fonts, however, the conversion stops at
* the first non-Tibetan font.
* @param noSuchACIP an array which will not be touched if this is
* successful; however, if there is no ACIP corresponding to one of
* these glyphs, then noSuchACIP[0] will be set to true
* @return the string of ACIP corresponding to this document */
public String getACIP(boolean noSuchACIP[]) {
return getACIP(0, getLength(), noSuchACIP);
}
/**
* Converts a portion of the document into Extended Wylie.
* If the document consists of both Tibetan and
@ -306,7 +318,25 @@ public class TibetanDocument extends DefaultStyledDocument {
* corresponding to one of these glyphs, then noSuchWylie[0] will be
* set to true
* @return the string of Wylie corresponding to this document */
public String getWylie(int begin, int end, boolean noSuchWylie[]) {
public String getWylie(int begin, int end, boolean noSuchWylie[]) {
return getTranslit(true, begin, end, noSuchWylie);
}
/**
* Converts a portion of the document into ACIP. If the document
* consists of both Tibetan and non-Tibetan fonts, however, the
* conversion stops at the first non-Tibetan font.
* @param begin the beginning of the region to convert
* @param end the end of the region to convert
* @param noSuchWylie an array which will not be touched if this is
* successful; however, if there is no ACIP corresponding to one of
* these glyphs, then noSuchACIP[0] will be set to true
* @return the string of ACIP corresponding to this document */
public String getACIP(int begin, int end, boolean noSuchACIP[]) {
return getTranslit(true, begin, end, noSuchACIP);
}
private String getTranslit(boolean EWTSNotACIP, int begin, int end, boolean noSuch[]) {
AttributeSet attr;
String fontName;
int fontNum;
@ -318,7 +348,7 @@ public class TibetanDocument extends DefaultStyledDocument {
java.util.List dcs = new ArrayList();
int i = begin;
StringBuffer wylieBuffer = new StringBuffer();
StringBuffer translitBuffer = new StringBuffer();
try {
while (i < end) {
@ -332,10 +362,10 @@ public class TibetanDocument extends DefaultStyledDocument {
if (dcs.size() > 0) {
DuffCode[] dc_array = new DuffCode[0];
dc_array = (DuffCode[])dcs.toArray(dc_array);
wylieBuffer.append(TibTextUtils.getWylie(dc_array, noSuchWylie));
translitBuffer.append(TibTextUtils.getTranslit(EWTSNotACIP, dc_array, noSuch));
dcs.clear();
}
wylieBuffer.append(ch);
translitBuffer.append(ch);
}
//current character isn't TMW
@ -343,7 +373,7 @@ public class TibetanDocument extends DefaultStyledDocument {
if (dcs.size() > 0) {
DuffCode[] dc_array = new DuffCode[0];
dc_array = (DuffCode[])dcs.toArray(dc_array);
wylieBuffer.append(TibTextUtils.getWylie(dc_array, noSuchWylie));
translitBuffer.append(TibTextUtils.getTranslit(EWTSNotACIP, dc_array, noSuch));
dcs.clear();
}
}
@ -358,9 +388,9 @@ public class TibetanDocument extends DefaultStyledDocument {
if (dcs.size() > 0) {
DuffCode[] dc_array = new DuffCode[0];
dc_array = (DuffCode[])dcs.toArray(dc_array);
wylieBuffer.append(TibTextUtils.getWylie(dc_array, noSuchWylie));
translitBuffer.append(TibTextUtils.getTranslit(EWTSNotACIP, dc_array, noSuch));
}
return wylieBuffer.toString();
return translitBuffer.toString();
}
catch (BadLocationException ble) {
ble.printStackTrace();
@ -1101,6 +1131,17 @@ public class TibetanDocument extends DefaultStyledDocument {
* DuffCode..." text into the document */
public boolean toWylie(int start, int end,
long numAttemptedReplacements[]) {
return toTranslit(true, start, end, numAttemptedReplacements);
}
// DLC DOC just like {@link #toWylie(int,int,long[])}
public boolean toACIP(int start, int end,
long numAttemptedReplacements[]) {
return toTranslit(false, start, end, numAttemptedReplacements);
}
private boolean toTranslit(boolean EWTSNotACIP, int start, int end,
long numAttemptedReplacements[]) {
if (start >= end)
return true;
@ -1124,7 +1165,9 @@ public class TibetanDocument extends DefaultStyledDocument {
remove(start, i-start);
ThdlDebug.verify(getRomanAttributeSet() != null);
insertString(start,
TibTextUtils.getWylie(dc_array, noSuchWylie),
TibTextUtils.getTranslit(EWTSNotACIP,
dc_array,
noSuchWylie),
getRomanAttributeSet());
dcs.clear();
}

View file

@ -1706,6 +1706,13 @@ public static String wylieForGlyph(String hashKey) {
return sb.toString();
}
// DLC DOC
private static String acipForGlyph(String hashKey) {
String ACIP // DLC FIXME: test this.
= org.thdl.tib.scanner.Manipulate.wylieToAcip(hashKey);
return ACIP;
}
/** Error that appears in a document when some TMW cannot be
* transcribed in THDL Extended Wylie. This error message is
* documented in www/htdocs/TMW_RTF_TO_THDL_WYLIE.html, so change
@ -1716,6 +1723,16 @@ private static String getTMWToWylieErrorString(DuffCode dc) {
+ " to THDL Extended Wylie. Please see the documentation for the TMW font and transcribe this yourself.]]>>";
}
/** Error that appears in a document when some TMW cannot be
* transcribed in ACIP. This error message is
* documented in www/htdocs/TMW_RTF_TO_THDL_WYLIE.html (DLC NOT YET), so change
* them both when you change this. */
private static String getTMWToACIPErrorString(DuffCode dc) {
return "<<[[JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert DuffCode "
+ dc.toString(true)
+ " to ACIP. Please see the documentation for the TMW font and transcribe this yourself.]]>>";
}
/**
* Gets the Extended Wylie value for this glyph.
* @param font the font of the TibetanMachineWeb
@ -1756,6 +1773,17 @@ public static String getWylieForGlyph(DuffCode dc, boolean noSuchWylie[]) {
return wylieForGlyph(hashKey);
}
// DLC DOC
public static String getACIPForGlyph(DuffCode dc, boolean noSuchACIP[]) {
String hashKey = getHashKeyForGlyph(dc);
String ans = (hashKey == null) ? null : acipForGlyph(hashKey);
if (hashKey == null || ans == null) {
noSuchACIP[0] = true;
return getTMWToACIPErrorString(dc);
}
return ans;
}
/** This addresses bug 624133, "Input freezes after impossible
* character". Returns true iff s is a proper prefix of some
* legal input for this keyboard. In the extended Wylie