A preliminary TMW->ACIP converter is here. There are known bugs, mostly with rare punctuation.

This commit is contained in:
dchandler 2003-09-02 06:39:33 +00:00
parent cc9ab06864
commit 316f59107b
9 changed files with 278 additions and 88 deletions

View file

@ -417,6 +417,8 @@ class ConvertDialog extends JDialog
} else { // conversion {to Wylie or TM} mode } else { // conversion {to Wylie or TM} mode
if (TMW_TO_WYLIE == ct) { if (TMW_TO_WYLIE == ct) {
newFileNamePrefix = suggested_WYLIE_prefix; newFileNamePrefix = suggested_WYLIE_prefix;
} else if (TMW_TO_ACIP == ct) {
newFileNamePrefix = suggested_ACIP_prefix;
} else if (TMW_TO_UNI == ct || ACIP_TO_UNI == ct) { } else if (TMW_TO_UNI == ct || ACIP_TO_UNI == ct) {
newFileNamePrefix = suggested_TO_UNI_prefix; newFileNamePrefix = suggested_TO_UNI_prefix;
} else if (TM_TO_TMW == ct || ACIP_TO_TMW == ct) { } else if (TM_TO_TMW == ct || ACIP_TO_TMW == ct) {

View file

@ -78,6 +78,12 @@ public class ConverterGUI implements FontConversion, FontConverterConstants {
"Attention required", "Attention required",
JOptionPane.ERROR_MESSAGE); JOptionPane.ERROR_MESSAGE);
return false; return false;
} else if (49 == returnCode) {
JOptionPane.showMessageDialog(cd,
"Though an output file has been created, it contains ugly\nerror messages like\n\"<<[[JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP:\n Cannot convert DuffCode...\".\nPlease edit the output by hand to replace all such\ncreatures with the correct ACIP transliteration.",
"Attention required",
JOptionPane.ERROR_MESSAGE);
return false;
} else if (43 == returnCode) { } else if (43 == returnCode) {
JOptionPane.showMessageDialog(cd, JOptionPane.showMessageDialog(cd,
"Though an output file has been created, this conversion did nothing.\nDid you choose the correct original file?\nDid you choose the correct type of conversion?", "Though an output file has been created, this conversion did nothing.\nDid you choose the correct original file?\nDid you choose the correct type of conversion?",

View file

@ -31,6 +31,7 @@ interface FontConverterConstants
final String TM_TO_TMW = "TM to TMW"; final String TM_TO_TMW = "TM to TMW";
final String TMW_TO_UNI = "TMW to Unicode"; final String TMW_TO_UNI = "TMW to Unicode";
final String TMW_TO_WYLIE = "TMW to Wylie"; final String TMW_TO_WYLIE = "TMW to Wylie";
final String TMW_TO_ACIP = "TMW to ACIP";
final String TMW_TO_TM = "TMW to TM"; final String TMW_TO_TM = "TMW to TM";
final String FIND_SOME_NON_TMW = "Find some non-TMW"; final String FIND_SOME_NON_TMW = "Find some non-TMW";
final String FIND_SOME_NON_TM = "Find some non-TM"; final String FIND_SOME_NON_TM = "Find some non-TM";
@ -43,6 +44,7 @@ interface FontConverterConstants
TM_TO_TMW, TM_TO_TMW,
TMW_TO_UNI, TMW_TO_UNI,
TMW_TO_WYLIE, TMW_TO_WYLIE,
TMW_TO_ACIP,
TMW_TO_TM, TMW_TO_TM,
FIND_SOME_NON_TMW, FIND_SOME_NON_TMW,
FIND_SOME_NON_TM, FIND_SOME_NON_TM,
@ -51,6 +53,7 @@ interface FontConverterConstants
}; };
final String suggested_WYLIE_prefix = "THDL_Wylie_"; final String suggested_WYLIE_prefix = "THDL_Wylie_";
final String suggested_ACIP_prefix = "ACIP_";
final String suggested_TO_TMW_prefix = "TMW_"; final String suggested_TO_TMW_prefix = "TMW_";
final String suggested_TO_UNI_prefix = "Uni_"; final String suggested_TO_UNI_prefix = "Uni_";
final String suggested_TO_TM_prefix = "TM_"; final String suggested_TO_TM_prefix = "TM_";

View file

@ -74,6 +74,7 @@ public class TibetanConverter implements FontConverterConstants {
boolean convertACIPToTMWMode = false; boolean convertACIPToTMWMode = false;
boolean convertToTMWMode = false; boolean convertToTMWMode = false;
boolean convertToWylieMode = false; boolean convertToWylieMode = false;
boolean convertToACIPMode = false;
boolean findSomeNonTMWMode = false; boolean findSomeNonTMWMode = false;
boolean findAllNonTMWMode = false; boolean findAllNonTMWMode = false;
boolean findSomeNonTMMode = false; boolean findSomeNonTMMode = false;
@ -98,6 +99,8 @@ public class TibetanConverter implements FontConverterConstants {
= args[0].equals("--to-unicode")) = args[0].equals("--to-unicode"))
|| (convertToWylieMode || (convertToWylieMode
= args[0].equals("--to-wylie")) = args[0].equals("--to-wylie"))
|| (convertToACIPMode
= args[0].equals("--to-acip"))
|| (findSomeNonTMWMode || (findSomeNonTMWMode
= args[0].equals("--find-some-non-tmw")) = args[0].equals("--find-some-non-tmw"))
|| (findSomeNonTMMode || (findSomeNonTMMode
@ -107,7 +110,7 @@ public class TibetanConverter implements FontConverterConstants {
))) { ))) {
out.println("TibetanConverter [--find-all-non-tmw | --find-some-non-tmw"); out.println("TibetanConverter [--find-all-non-tmw | --find-some-non-tmw");
out.println(" | --to-tibetan-machine | --to-tibetan-machine-web"); out.println(" | --to-tibetan-machine | --to-tibetan-machine-web");
out.println(" | --to-unicode | --to-wylie] RTF_file"); out.println(" | --to-unicode | --to-wylie | --to-acip] RTF_file");
out.println(" | TibetanConverter --acip-to-unicode TXT_file"); out.println(" | TibetanConverter --acip-to-unicode TXT_file");
out.println(" | TibetanConverter [--version | -v | --help | -h]"); out.println(" | TibetanConverter [--version | -v | --help | -h]");
out.println(""); out.println("");
@ -120,6 +123,7 @@ public class TibetanConverter implements FontConverterConstants {
out.println(" --to-unicode to convert TibetanMachineWeb to Unicode"); out.println(" --to-unicode to convert TibetanMachineWeb to Unicode");
out.println(" --to-tibetan-machine-web to convert TibetanMachine to TibetanMachineWeb"); out.println(" --to-tibetan-machine-web to convert TibetanMachine to TibetanMachineWeb");
out.println(" --to-wylie to convert TibetanMachineWeb to THDL Extended Wylie"); out.println(" --to-wylie to convert TibetanMachineWeb to THDL Extended Wylie");
out.println(" --to-acip to convert TibetanMachineWeb to ACIP");
out.println(" --acip-to-unicode to convert ACIP text file to Unicode text file"); out.println(" --acip-to-unicode to convert ACIP text file to Unicode text file");
out.println(" --find-all-non-tmw to locate all characters in the input document that are"); out.println(" --find-all-non-tmw to locate all characters in the input document that are");
out.println(" not in Tibetan Machine Web fonts, exit zero if and only if none found"); out.println(" not in Tibetan Machine Web fonts, exit zero if and only if none found");
@ -177,6 +181,8 @@ public class TibetanConverter implements FontConverterConstants {
} else { // conversion {to Wylie or TM} mode } else { // conversion {to Wylie or TM} mode
if (convertToWylieMode) { if (convertToWylieMode) {
conversionTag = TMW_TO_WYLIE; conversionTag = TMW_TO_WYLIE;
} else if (convertToACIPMode) {
conversionTag = TMW_TO_ACIP;
} else if (convertToUnicodeMode) { } else if (convertToUnicodeMode) {
conversionTag = TMW_TO_UNI; conversionTag = TMW_TO_UNI;
} else if (convertToTMWMode) { } else if (convertToTMWMode) {
@ -311,6 +317,7 @@ public class TibetanConverter implements FontConverterConstants {
ThdlDebug.verify(((TMW_TO_TM == ct) ? 1 : 0) ThdlDebug.verify(((TMW_TO_TM == ct) ? 1 : 0)
+ ((TMW_TO_UNI == ct) ? 1 : 0) + ((TMW_TO_UNI == ct) ? 1 : 0)
+ ((TM_TO_TMW == ct) ? 1 : 0) + ((TM_TO_TMW == ct) ? 1 : 0)
+ ((TMW_TO_ACIP == ct) ? 1 : 0)
+ ((TMW_TO_WYLIE == ct) ? 1 : 0) + ((TMW_TO_WYLIE == ct) ? 1 : 0)
== 1); == 1);
long numAttemptedReplacements[] = new long[] { 0 }; long numAttemptedReplacements[] = new long[] { 0 };
@ -321,6 +328,13 @@ public class TibetanConverter implements FontConverterConstants {
numAttemptedReplacements)) { numAttemptedReplacements)) {
exitCode = 44; exitCode = 44;
} }
} else if (TMW_TO_ACIP == ct) {
// Convert to ACIP:
if (!tdoc.toACIP(0,
tdoc.getLength(),
numAttemptedReplacements)) {
exitCode = 49;
}
} else if (TMW_TO_UNI == ct) { } else if (TMW_TO_UNI == ct) {
StringBuffer errors = new StringBuffer(); StringBuffer errors = new StringBuffer();
// Convert to Unicode: // Convert to Unicode:

View file

@ -44,8 +44,13 @@ public class Manipulate
return ch=='a' || ch=='e' || ch=='i' || ch=='o' || ch=='u'; return ch=='a' || ch=='e' || ch=='i' || ch=='o' || ch=='u';
} }
/** Returns null on error. */
public static String wylieToAcip(String palabra) public static String wylieToAcip(String palabra)
{ {
// DLC FIXME: for unknown things, return null.
if (palabra.equals("@#")) return "*";
if (palabra.startsWith("@") || palabra.startsWith("#"))
return null; // we can't convert this in isolation! We need context.
char []caract; char []caract;
int i, j, len; int i, j, len;
String nuevaPalabra; String nuevaPalabra;
@ -83,6 +88,12 @@ public class Manipulate
nuevaPalabra = replace(nuevaPalabra, "TSH", "TQQ"); nuevaPalabra = replace(nuevaPalabra, "TSH", "TQQ");
nuevaPalabra = replace(nuevaPalabra, "TS", "TZ"); nuevaPalabra = replace(nuevaPalabra, "TS", "TZ");
nuevaPalabra = replace(nuevaPalabra, "TQQ", "TS"); nuevaPalabra = replace(nuevaPalabra, "TQQ", "TS");
nuevaPalabra = replace(nuevaPalabra, "a", "'A");
nuevaPalabra = replace(nuevaPalabra, "i", "'I");
nuevaPalabra = replace(nuevaPalabra, "u", "'U");
nuevaPalabra = replace(nuevaPalabra, "-I", "i");
nuevaPalabra = replace(nuevaPalabra, "/", ",");
nuevaPalabra = replace(nuevaPalabra, "_", " ");
nuevaPalabra = fixWazur(nuevaPalabra); nuevaPalabra = fixWazur(nuevaPalabra);
return nuevaPalabra; return nuevaPalabra;
} }

View file

@ -86,6 +86,30 @@ public class TGCPair {
b.append(vowelWylie); b.append(vowelWylie);
return b.toString(); return b.toString();
} }
public String getACIP() {
// DLC FIXME: has the EWTS change affected Manipulate.acipToWylie?
StringBuffer b = new StringBuffer();
if (consonantWylie != null) {
String consonantACIP // DLC FIXME can KAsh occur?
= org.thdl.tib.scanner.Manipulate.wylieToAcip(consonantWylie);
if (null == consonantACIP) throw new Error("how?");
// System.out.println("DLC: Wylie=" + consonantWylie + ", ACIP=" + consonantACIP);
// we may have {P-Y}, but the user wants to see {PY}.
for (int i = 0; i < consonantACIP.length(); i++) {
char ch = consonantACIP.charAt(i);
if ('-' != ch)
b.append(ch);
}
}
if (vowelWylie != null) {
String vowelACIP // DLC FIXME look for exceptions
= org.thdl.tib.scanner.Manipulate.wylieToAcip(vowelWylie);
// System.out.println("DLC: Wylie=" + vowelWylie + ", ACIP=" + vowelACIP);
if (null == vowelACIP) throw new Error("how?");
b.append(vowelACIP);
}
return b.toString();
}
public int classification; public int classification;
/** Constructs a new TGCPair with (Tibetan or Sanskrit) consonant /** Constructs a new TGCPair with (Tibetan or Sanskrit) consonant
* consonantWylie and vowel vowelWylie. Use * consonantWylie and vowel vowelWylie. Use

View file

@ -786,37 +786,50 @@ public class TibTextUtils implements THDLWylieConstants {
private static final boolean makeIllegalTibetanGoEndToEnd = true; private static final boolean makeIllegalTibetanGoEndToEnd = true;
/** Returns "a", unless wylie is already "a". */ /** Returns "a"/"A", unless wylie (which really is EWTS, not ACIP)
private static String aVowelToUseAfter(String wylie) { is already "a". */
private static String aVowelToUseAfter(boolean EWTSNotACIP, String wylie) {
if (wylie.equals(ACHEN)) if (wylie.equals(ACHEN))
return ""; return "";
else else
return WYLIE_aVOWEL; return (EWTSNotACIP) ? WYLIE_aVOWEL : "A";
} }
private static String unambiguousPostAVowelWylie(String wylie1, private static String unambiguousPostAVowelTranslit(boolean EWTSNotACIP,
String wylie2) { String wylie1,
String wylie2,
String acip1,
String acip2) {
String disambiguator = ""; String disambiguator = "";
// type "lard" vs. "lar.d", and you'll see the need for this // type "lard" vs. "lar.d", and you'll see the need for this
// disambiguation of suffix and postsuffix. sa doesn't take // disambiguation of suffix and postsuffix. sa doesn't take
// any head letters, so only da needs to be considered. // any head letters, so only da needs to be considered.
if (TibetanMachineWeb.isWylieTop(wylie1) if (TibetanMachineWeb.isWylieTop(wylie1)
&& wylie2.equals(/* FIXME: hard-coded */ "d")) && wylie2.equals(/* FIXME: hard-coded */ "d"))
disambiguator = WYLIE_DISAMBIGUATING_KEY_STRING; disambiguator = (EWTSNotACIP) ? WYLIE_DISAMBIGUATING_KEY_STRING : "-";
return wylie1 + disambiguator + wylie2; if (EWTSNotACIP)
return wylie1 + disambiguator + wylie2;
else
return acip1 + disambiguator + acip2;
} }
/** /**
* Gets the Extended Wylie for a sequence of glyphs. * Gets the Extended Wylie for the given sequence of glyphs if
* EWTSNotACIP is true, or the ACIP otherwise.
* @param EWTSNotACIP true if you want THDL Extended Wylie, false if
* you want ACIP
* @param dcs an array of glyphs * @param dcs an array of glyphs
* @param noSuchWylie an array which will not be touched if this is * @param noSuch an array which will not be touched if this is
* successful; however, if there is no THDL Extended Wylie * successful; however, if there is no THDL Extended Wylie/ACIP
* corresponding to these glyphs, then noSuchWylie[0] will be set to * corresponding to these glyphs, then noSuch[0] will be set to true
* true * @return the Extended Wylie/ACIP corresponding to these glyphs, or
* @return the Extended Wylie corresponding to these glyphs, or null */ * null */
public static String getWylie(DuffCode[] dcs, boolean noSuchWylie[]) { public static String getTranslit(boolean EWTSNotACIP,
DuffCode[] dcs,
boolean noSuch[]) {
StringBuffer warnings = (debug ? new StringBuffer() : null); StringBuffer warnings = (debug ? new StringBuffer() : null);
String ans = getWylieImplementation(dcs, noSuchWylie, warnings); String ans
= getTranslitImplementation(EWTSNotACIP, dcs, noSuch, warnings);
if (debug && warnings.length() > 0) if (debug && warnings.length() > 0)
System.out.println("DEBUG: warnings in TMW->Wylie: " + warnings); System.out.println("DEBUG: warnings in TMW->Wylie: " + warnings);
return ans; return ans;
@ -1172,13 +1185,13 @@ public class TibTextUtils implements THDLWylieConstants {
return candidateType; return candidateType;
} }
/** Appends to wylieBuffer the wylie for the glyph list glyphList /** Appends to translitBuffer the EWTS/ACIP for the glyph list
(which should be an ArrayList for speed). This will be very glyphList (which should be an ArrayList for speed). This will
user-friendly for "legal tsheg bars" and will be valid, but be very user-friendly for "legal tsheg bars" and will be
possibly ugly (interspersed with disambiguators or extra valid, but possibly ugly (interspersed with disambiguators or
vowels, etc.) Wylie for other things, such as Sanskrit extra vowels, etc.) Wylie/ACIP for other things, such as
transliteration. Updates warnings and noSuchWylie like the Sanskrit transliteration. Updates warnings and noSuch like
caller does. the caller does.
<p>What constitutes a legal, non-punctuation, non-whitespace <p>What constitutes a legal, non-punctuation, non-whitespace
tsheg bar? The following are the only such:</p> tsheg bar? The following are the only such:</p>
@ -1219,22 +1232,23 @@ public class TibTextUtils implements THDLWylieConstants {
<p>When there are three unadorned consonant stacks in a <p>When there are three unadorned consonant stacks in a
tyllable, a hard-coded list of valid Tibetan tsheg bars is tyllable, a hard-coded list of valid Tibetan tsheg bars is
relied upon to determine if the 'a' vowel comes after the relied upon to determine if the 'a'/'A' vowel comes after
first or the second consonant.</p> */ the first or the second consonant.</p> */
private static void getTshegBarWylie(java.util.List glyphList, private static void getTshegBarTranslit(boolean EWTSNotACIP,
boolean noSuchWylie[], java.util.List glyphList,
StringBuffer warnings, boolean noSuch[],
StringBuffer wylieBuffer) { StringBuffer warnings,
StringBuffer translitBuffer) {
TGCList gcs TGCList gcs
= breakTshegBarIntoGraphemeClusters(glyphList, noSuchWylie); = breakTshegBarIntoGraphemeClusters(glyphList, noSuch);
String candidateType = getClassificationOfTshegBar(gcs, warnings, false); String candidateType = getClassificationOfTshegBar(gcs, warnings, false);
int sz = gcs.size(); int sz = gcs.size();
if (candidateType == "invalid" if (candidateType == "invalid"
|| candidateType == "single-sanskrit-gc") { || candidateType == "single-sanskrit-gc") {
// Forget beauty and succintness -- just be sure to // Forget beauty and succintness -- just be sure to
// generate Wylie that can be converted unambiguously into // generate transliteration that can be converted
// Tibetan. Use a disambiguator or vowel after each // unambiguously into Tibetan. Use a disambiguator or
// grapheme cluster. // vowel after each grapheme cluster.
// //
// If we truly didn't care about beauty, we'd just lump // If we truly didn't care about beauty, we'd just lump
// SANSKRIT_WITHOUT_VOWEL and SANSKRIT_WITH_VOWEL into // SANSKRIT_WITHOUT_VOWEL and SANSKRIT_WITH_VOWEL into
@ -1244,19 +1258,20 @@ public class TibTextUtils implements THDLWylieConstants {
TGCPair tp = (TGCPair)gcs.get(i); TGCPair tp = (TGCPair)gcs.get(i);
int cls = tp.classification; int cls = tp.classification;
String wylie = tp.getWylie(); String wylie = tp.getWylie();
wylieBuffer.append(wylie); String translit = (EWTSNotACIP) ? wylie : tp.getACIP();
translitBuffer.append(translit);
if (TibetanMachineWeb.isWylieTibetanConsonantOrConsonantStack(wylie) if (TibetanMachineWeb.isWylieTibetanConsonantOrConsonantStack(wylie)
|| TibetanMachineWeb.isWylieSanskritConsonantStack(wylie)) { || TibetanMachineWeb.isWylieSanskritConsonantStack(wylie)) {
wylieBuffer.append(aVowelToUseAfter(wylie)); translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie));
} else { } else {
if (TGCPair.CONSONANTAL_WITH_VOWEL != cls if (TGCPair.CONSONANTAL_WITH_VOWEL != cls
&& TGCPair.SANSKRIT_WITH_VOWEL != cls) && TGCPair.SANSKRIT_WITH_VOWEL != cls)
wylieBuffer.append(WYLIE_DISAMBIGUATING_KEY); translitBuffer.append(EWTSNotACIP ? WYLIE_DISAMBIGUATING_KEY : '-');
} }
} }
} else { } else {
// Generate perfect, beautiful, Wylie, using the minimum // Generate perfect, beautiful transliteration, using the
// number of vowels and disambiguators. // minimum number of vowels and disambiguators.
int leftover = sz + 1; int leftover = sz + 1;
@ -1299,23 +1314,44 @@ public class TibTextUtils implements THDLWylieConstants {
String wylie1 = ((TGCPair)gcs.get(0)).getWylie(); String wylie1 = ((TGCPair)gcs.get(0)).getWylie();
String wylie2 = ((TGCPair)gcs.get(1)).getWylie(); String wylie2 = ((TGCPair)gcs.get(1)).getWylie();
String wylie3 = ((TGCPair)gcs.get(2)).getWylie(); String wylie3 = ((TGCPair)gcs.get(2)).getWylie();
String acip1 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(0)).getACIP();
String acip2 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(1)).getACIP();
String acip3 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(2)).getACIP();
if ((wylie1.equals("g") && (wylie2.equals("d") || wylie2.equals("n") || wylie2.equals("s"))) if ((wylie1.equals("g") && (wylie2.equals("d") || wylie2.equals("n") || wylie2.equals("s")))
|| (wylie1.equals("d") && (wylie2.equals("g") || wylie2.equals("m"))) || (wylie1.equals("d") && (wylie2.equals("g") || wylie2.equals("m")))
|| (wylie1.equals("b") && wylie2.equals("d")) || (wylie1.equals("b") && wylie2.equals("d"))
|| (wylie1.equals("m") && wylie2.equals("d")) || (wylie1.equals("m") && wylie2.equals("d"))
|| (wylie1.equals("'") && (wylie2.equals("g") || wylie2.equals("d") || wylie2.equals("b")))) { || (wylie1.equals("'") && (wylie2.equals("g") || wylie2.equals("d") || wylie2.equals("b")))) {
if (TibetanMachineWeb.isAmbiguousWylie(wylie1, wylie2)) if (TibetanMachineWeb.isAmbiguousWylie(wylie1, wylie2))
wylieBuffer.append(wylie1 + WYLIE_DISAMBIGUATING_KEY + wylie2); if (EWTSNotACIP)
translitBuffer.append(wylie1 + WYLIE_DISAMBIGUATING_KEY + wylie2);
else
translitBuffer.append(acip1 + '-' + acip2);
else else
wylieBuffer.append(wylie1 + wylie2); if (EWTSNotACIP)
translitBuffer.append(wylie1 + wylie2);
else
translitBuffer.append(acip1 + acip2);
wylieBuffer.append(aVowelToUseAfter(wylie2) translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie2)
+ wylie3); + (EWTSNotACIP ? wylie3 : acip3));
} else { } else {
wylieBuffer.append(wylie1 if (EWTSNotACIP)
+ aVowelToUseAfter(wylie1) translitBuffer.append(wylie1
+ unambiguousPostAVowelWylie(wylie2, + aVowelToUseAfter(EWTSNotACIP, wylie1)
wylie3)); + unambiguousPostAVowelTranslit(EWTSNotACIP,
wylie2,
wylie3,
acip2,
acip3));
else
translitBuffer.append(acip1
+ aVowelToUseAfter(EWTSNotACIP, wylie1)
+ unambiguousPostAVowelTranslit(EWTSNotACIP,
wylie2,
wylie3,
acip2,
acip3));
} }
} else if ("root" == candidateType } else if ("root" == candidateType
|| "prefix/root-root/suffix" == candidateType || "prefix/root-root/suffix" == candidateType
@ -1323,13 +1359,14 @@ public class TibTextUtils implements THDLWylieConstants {
|| "root-suffix-postsuffix" == candidateType || "root-suffix-postsuffix" == candidateType
|| "root-suffix" == candidateType) { || "root-suffix" == candidateType) {
String wylie1 = ((TGCPair)gcs.get(0)).getWylie(); String wylie1 = ((TGCPair)gcs.get(0)).getWylie();
String acip1 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(0)).getACIP();
leftover = 1; leftover = 1;
wylieBuffer.append(wylie1); translitBuffer.append((EWTSNotACIP) ? wylie1 : acip1);
if (((TGCPair)gcs.get(0)).classification if (((TGCPair)gcs.get(0)).classification
!= TGCPair.CONSONANTAL_WITH_VOWEL) { != TGCPair.CONSONANTAL_WITH_VOWEL) {
ThdlDebug.verify(TGCPair.CONSONANTAL_WITHOUT_VOWEL ThdlDebug.verify(TGCPair.CONSONANTAL_WITHOUT_VOWEL
== ((TGCPair)gcs.get(0)).classification); == ((TGCPair)gcs.get(0)).classification);
wylieBuffer.append(aVowelToUseAfter(wylie1)); translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie1));
if (debug) System.out.println("DEBUG: appending vowel"); if (debug) System.out.println("DEBUG: appending vowel");
} else { } else {
if (debug) System.out.println("DEBUG: already has vowel 2"); if (debug) System.out.println("DEBUG: already has vowel 2");
@ -1338,26 +1375,39 @@ public class TibTextUtils implements THDLWylieConstants {
leftover = 3; leftover = 3;
String wylie2 = ((TGCPair)gcs.get(1)).getWylie(); String wylie2 = ((TGCPair)gcs.get(1)).getWylie();
String wylie3 = ((TGCPair)gcs.get(2)).getWylie(); String wylie3 = ((TGCPair)gcs.get(2)).getWylie();
wylieBuffer.append(unambiguousPostAVowelWylie(wylie2, String acip2 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(1)).getACIP();
wylie3)); String acip3 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(2)).getACIP();
translitBuffer.append(unambiguousPostAVowelTranslit(EWTSNotACIP,
wylie2,
wylie3,
acip2,
acip3));
} }
} else if ("prefix-root-suffix" == candidateType } else if ("prefix-root-suffix" == candidateType
|| "prefix-root" == candidateType || "prefix-root" == candidateType
|| "prefix-root-suffix-postsuffix" == candidateType) { || "prefix-root-suffix-postsuffix" == candidateType) {
String wylie1 = ((TGCPair)gcs.get(0)).getWylie(); String wylie1 = ((TGCPair)gcs.get(0)).getWylie();
String wylie2 = ((TGCPair)gcs.get(1)).getWylie(); String wylie2 = ((TGCPair)gcs.get(1)).getWylie();
String acip1 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(0)).getACIP();
String acip2 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(1)).getACIP();
leftover = 2; leftover = 2;
if (TibetanMachineWeb.isAmbiguousWylie(wylie1, wylie2)) if (TibetanMachineWeb.isAmbiguousWylie(wylie1, wylie2))
wylieBuffer.append(wylie1 + WYLIE_DISAMBIGUATING_KEY + wylie2); if (EWTSNotACIP)
translitBuffer.append(wylie1 + WYLIE_DISAMBIGUATING_KEY + wylie2);
else
translitBuffer.append(acip1 + '-' + acip2);
else else
wylieBuffer.append(wylie1 + wylie2); if (EWTSNotACIP)
translitBuffer.append(wylie1 + wylie2);
else
translitBuffer.append(acip1 + acip2);
if (((TGCPair)gcs.get(1)).classification if (((TGCPair)gcs.get(1)).classification
!= TGCPair.CONSONANTAL_WITH_VOWEL) { != TGCPair.CONSONANTAL_WITH_VOWEL) {
ThdlDebug.verify(TGCPair.CONSONANTAL_WITHOUT_VOWEL ThdlDebug.verify(TGCPair.CONSONANTAL_WITHOUT_VOWEL
== ((TGCPair)gcs.get(1)).classification); == ((TGCPair)gcs.get(1)).classification);
if (debug) System.out.println("DEBUG: appending vowel"); if (debug) System.out.println("DEBUG: appending vowel");
wylieBuffer.append(aVowelToUseAfter(wylie2)); translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie2));
} else { } else {
if (debug) System.out.println("DEBUG: already has vowel 1"); if (debug) System.out.println("DEBUG: already has vowel 1");
} }
@ -1365,8 +1415,13 @@ public class TibTextUtils implements THDLWylieConstants {
leftover = 4; leftover = 4;
String wylie3 = ((TGCPair)gcs.get(2)).getWylie(); String wylie3 = ((TGCPair)gcs.get(2)).getWylie();
String wylie4 = ((TGCPair)gcs.get(3)).getWylie(); String wylie4 = ((TGCPair)gcs.get(3)).getWylie();
wylieBuffer.append(unambiguousPostAVowelWylie(wylie3, String acip3 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(2)).getACIP();
wylie4)); String acip4 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(3)).getACIP();
translitBuffer.append(unambiguousPostAVowelTranslit(EWTSNotACIP,
wylie3,
wylie4,
acip3,
acip4));
} }
} else if ("number" == candidateType) { } else if ("number" == candidateType) {
leftover = 0; leftover = 0;
@ -1374,18 +1429,17 @@ public class TibTextUtils implements THDLWylieConstants {
throw new Error("missed a case down here"); throw new Error("missed a case down here");
} }
// append the wylie left over: // append the wylie/ACIP left over:
for (int i = leftover; i < sz; i++) { for (int i = leftover; i < sz; i++) {
TGCPair tp = (TGCPair)gcs.get(i); TGCPair tp = (TGCPair)gcs.get(i);
String wylie = tp.getWylie(); translitBuffer.append(EWTSNotACIP ? tp.getWylie() : tp.getACIP());
wylieBuffer.append(wylie);
} }
} }
} }
/** /**
* Gets the Extended Wylie for a sequence of glyphs. This works as * Gets the Extended Wylie/ACIP for a sequence of glyphs. This works
* follows: * as follows:
* *
* <p>We run along until we hit whitespace or punctuation. We take * <p>We run along until we hit whitespace or punctuation. We take
* everything before that and we see if it's a legal Tibetan tsheg bar, * everything before that and we see if it's a legal Tibetan tsheg bar,
@ -1393,22 +1447,25 @@ public class TibTextUtils implements THDLWylieConstants {
* vowel in the correct place. If not, then we throw a disambiguating * vowel in the correct place. If not, then we throw a disambiguating
* key or a vowel after each stack. * key or a vowel after each stack.
* *
* @param EWTSNotACIP true if you want THDL Extended Wylie, false if
* you want ACIP
* @param dcs an array of glyphs * @param dcs an array of glyphs
* @param noSuchWylie an array which will not be touched if this is * @param noSuch an array which will not be touched if this is
* successful; however, if there is no THDL Extended Wylie * successful; however, if there is no THDL Extended Wylie/ACIP
* corresponding to these glyphs, then noSuchWylie[0] will be set to * corresponding to these glyphs, then noSuch[0] will be set to true
* true
* @param warnings either null or a buffer to which will be appended * @param warnings either null or a buffer to which will be appended
* warnings about illegal tsheg bars * warnings about illegal tsheg bars
* @return the Extended Wylie corresponding to these glyphs, or null */ * @return the Extended Wylie/ACIP corresponding to these glyphs, or
public static String getWylieImplementation(DuffCode[] dcs, * null */
boolean noSuchWylie[], private static String getTranslitImplementation(boolean EWTSNotACIP,
StringBuffer warnings) { DuffCode[] dcs,
boolean noSuch[],
StringBuffer warnings) {
if (dcs.length == 0) if (dcs.length == 0)
return null; return null;
ArrayList glyphList = new ArrayList(); ArrayList glyphList = new ArrayList();
StringBuffer wylieBuffer = new StringBuffer(); StringBuffer translitBuffer = new StringBuffer();
for (int i=0; i<dcs.length; i++) { for (int i=0; i<dcs.length; i++) {
char ch = dcs[i].getCharacter(); char ch = dcs[i].getCharacter();
@ -1417,41 +1474,43 @@ public class TibTextUtils implements THDLWylieConstants {
if (k < 32) { if (k < 32) {
if (!glyphList.isEmpty()) { if (!glyphList.isEmpty()) {
getTshegBarWylie(glyphList, noSuchWylie, getTshegBarTranslit(EWTSNotACIP, glyphList, noSuch,
warnings, wylieBuffer); warnings, translitBuffer);
glyphList.clear(); glyphList.clear();
if (null != warnings) if (null != warnings)
warnings.append("Some glyphs came right before a newline; they did not have a tsheg or shad come first."); warnings.append("Some glyphs came right before a newline; they did not have a tsheg or shad come first.");
} }
wylieBuffer.append(ch); translitBuffer.append(ch);
} else { } else {
String wylie = TibetanMachineWeb.getWylieForGlyph(dcs[i], noSuchWylie); String wylie = TibetanMachineWeb.getWylieForGlyph(dcs[i], noSuch);
String acip = EWTSNotACIP ? null : TibetanMachineWeb.getACIPForGlyph(dcs[i], noSuch);
if (TibetanMachineWeb.isWyliePunc(wylie) if (TibetanMachineWeb.isWyliePunc(wylie)
&& !TibetanMachineWeb.isWylieAdornment(wylie)) { && !TibetanMachineWeb.isWylieAdornment(wylie)) {
if (!glyphList.isEmpty()) { if (!glyphList.isEmpty()) {
getTshegBarWylie(glyphList, noSuchWylie, getTshegBarTranslit(EWTSNotACIP, glyphList, noSuch,
warnings, wylieBuffer); warnings, translitBuffer);
glyphList.clear(); glyphList.clear();
} }
wylieBuffer.append(wylie); //append the punctuation translitBuffer.append(EWTSNotACIP ? wylie : acip); //append the punctuation
} else { } else {
glyphList.add(dcs[i]); glyphList.add(dcs[i]);
} }
} }
} }
// replace remaining TMW with Wylie // replace remaining TMW with transliteration
if (!glyphList.isEmpty()) { if (!glyphList.isEmpty()) {
getTshegBarWylie(glyphList, noSuchWylie, warnings, wylieBuffer); getTshegBarTranslit(EWTSNotACIP, glyphList, noSuch,
warnings, translitBuffer);
// glyphList.clear() if we weren't about to exit... // glyphList.clear() if we weren't about to exit...
if (null != warnings) if (null != warnings)
warnings.append("The stretch of Tibetan ended without final punctuation."); warnings.append("The stretch of Tibetan ended without final punctuation.");
} }
if (wylieBuffer.length() > 0) if (translitBuffer.length() > 0)
return wylieBuffer.toString(); return translitBuffer.toString();
else else
return null; return null;
} }

View file

@ -294,6 +294,18 @@ public class TibetanDocument extends DefaultStyledDocument {
return getWylie(0, getLength(), noSuchWylie); return getWylie(0, getLength(), noSuchWylie);
} }
/**
* Converts the entire document into ACIP. If the document consists of
* both Tibetan and non-Tibetan fonts, however, the conversion stops at
* the first non-Tibetan font.
* @param noSuchACIP an array which will not be touched if this is
* successful; however, if there is no ACIP corresponding to one of
* these glyphs, then noSuchACIP[0] will be set to true
* @return the string of ACIP corresponding to this document */
public String getACIP(boolean noSuchACIP[]) {
return getACIP(0, getLength(), noSuchACIP);
}
/** /**
* Converts a portion of the document into Extended Wylie. * Converts a portion of the document into Extended Wylie.
* If the document consists of both Tibetan and * If the document consists of both Tibetan and
@ -306,7 +318,25 @@ public class TibetanDocument extends DefaultStyledDocument {
* corresponding to one of these glyphs, then noSuchWylie[0] will be * corresponding to one of these glyphs, then noSuchWylie[0] will be
* set to true * set to true
* @return the string of Wylie corresponding to this document */ * @return the string of Wylie corresponding to this document */
public String getWylie(int begin, int end, boolean noSuchWylie[]) { public String getWylie(int begin, int end, boolean noSuchWylie[]) {
return getTranslit(true, begin, end, noSuchWylie);
}
/**
* Converts a portion of the document into ACIP. If the document
* consists of both Tibetan and non-Tibetan fonts, however, the
* conversion stops at the first non-Tibetan font.
* @param begin the beginning of the region to convert
* @param end the end of the region to convert
* @param noSuchWylie an array which will not be touched if this is
* successful; however, if there is no ACIP corresponding to one of
* these glyphs, then noSuchACIP[0] will be set to true
* @return the string of ACIP corresponding to this document */
public String getACIP(int begin, int end, boolean noSuchACIP[]) {
return getTranslit(true, begin, end, noSuchACIP);
}
private String getTranslit(boolean EWTSNotACIP, int begin, int end, boolean noSuch[]) {
AttributeSet attr; AttributeSet attr;
String fontName; String fontName;
int fontNum; int fontNum;
@ -318,7 +348,7 @@ public class TibetanDocument extends DefaultStyledDocument {
java.util.List dcs = new ArrayList(); java.util.List dcs = new ArrayList();
int i = begin; int i = begin;
StringBuffer wylieBuffer = new StringBuffer(); StringBuffer translitBuffer = new StringBuffer();
try { try {
while (i < end) { while (i < end) {
@ -332,10 +362,10 @@ public class TibetanDocument extends DefaultStyledDocument {
if (dcs.size() > 0) { if (dcs.size() > 0) {
DuffCode[] dc_array = new DuffCode[0]; DuffCode[] dc_array = new DuffCode[0];
dc_array = (DuffCode[])dcs.toArray(dc_array); dc_array = (DuffCode[])dcs.toArray(dc_array);
wylieBuffer.append(TibTextUtils.getWylie(dc_array, noSuchWylie)); translitBuffer.append(TibTextUtils.getTranslit(EWTSNotACIP, dc_array, noSuch));
dcs.clear(); dcs.clear();
} }
wylieBuffer.append(ch); translitBuffer.append(ch);
} }
//current character isn't TMW //current character isn't TMW
@ -343,7 +373,7 @@ public class TibetanDocument extends DefaultStyledDocument {
if (dcs.size() > 0) { if (dcs.size() > 0) {
DuffCode[] dc_array = new DuffCode[0]; DuffCode[] dc_array = new DuffCode[0];
dc_array = (DuffCode[])dcs.toArray(dc_array); dc_array = (DuffCode[])dcs.toArray(dc_array);
wylieBuffer.append(TibTextUtils.getWylie(dc_array, noSuchWylie)); translitBuffer.append(TibTextUtils.getTranslit(EWTSNotACIP, dc_array, noSuch));
dcs.clear(); dcs.clear();
} }
} }
@ -358,9 +388,9 @@ public class TibetanDocument extends DefaultStyledDocument {
if (dcs.size() > 0) { if (dcs.size() > 0) {
DuffCode[] dc_array = new DuffCode[0]; DuffCode[] dc_array = new DuffCode[0];
dc_array = (DuffCode[])dcs.toArray(dc_array); dc_array = (DuffCode[])dcs.toArray(dc_array);
wylieBuffer.append(TibTextUtils.getWylie(dc_array, noSuchWylie)); translitBuffer.append(TibTextUtils.getTranslit(EWTSNotACIP, dc_array, noSuch));
} }
return wylieBuffer.toString(); return translitBuffer.toString();
} }
catch (BadLocationException ble) { catch (BadLocationException ble) {
ble.printStackTrace(); ble.printStackTrace();
@ -1101,6 +1131,17 @@ public class TibetanDocument extends DefaultStyledDocument {
* DuffCode..." text into the document */ * DuffCode..." text into the document */
public boolean toWylie(int start, int end, public boolean toWylie(int start, int end,
long numAttemptedReplacements[]) { long numAttemptedReplacements[]) {
return toTranslit(true, start, end, numAttemptedReplacements);
}
// DLC DOC just like {@link #toWylie(int,int,long[])}
public boolean toACIP(int start, int end,
long numAttemptedReplacements[]) {
return toTranslit(false, start, end, numAttemptedReplacements);
}
private boolean toTranslit(boolean EWTSNotACIP, int start, int end,
long numAttemptedReplacements[]) {
if (start >= end) if (start >= end)
return true; return true;
@ -1124,7 +1165,9 @@ public class TibetanDocument extends DefaultStyledDocument {
remove(start, i-start); remove(start, i-start);
ThdlDebug.verify(getRomanAttributeSet() != null); ThdlDebug.verify(getRomanAttributeSet() != null);
insertString(start, insertString(start,
TibTextUtils.getWylie(dc_array, noSuchWylie), TibTextUtils.getTranslit(EWTSNotACIP,
dc_array,
noSuchWylie),
getRomanAttributeSet()); getRomanAttributeSet());
dcs.clear(); dcs.clear();
} }

View file

@ -1706,6 +1706,13 @@ public static String wylieForGlyph(String hashKey) {
return sb.toString(); return sb.toString();
} }
// DLC DOC
private static String acipForGlyph(String hashKey) {
String ACIP // DLC FIXME: test this.
= org.thdl.tib.scanner.Manipulate.wylieToAcip(hashKey);
return ACIP;
}
/** Error that appears in a document when some TMW cannot be /** Error that appears in a document when some TMW cannot be
* transcribed in THDL Extended Wylie. This error message is * transcribed in THDL Extended Wylie. This error message is
* documented in www/htdocs/TMW_RTF_TO_THDL_WYLIE.html, so change * documented in www/htdocs/TMW_RTF_TO_THDL_WYLIE.html, so change
@ -1716,6 +1723,16 @@ private static String getTMWToWylieErrorString(DuffCode dc) {
+ " to THDL Extended Wylie. Please see the documentation for the TMW font and transcribe this yourself.]]>>"; + " to THDL Extended Wylie. Please see the documentation for the TMW font and transcribe this yourself.]]>>";
} }
/** Error that appears in a document when some TMW cannot be
* transcribed in ACIP. This error message is
* documented in www/htdocs/TMW_RTF_TO_THDL_WYLIE.html (DLC NOT YET), so change
* them both when you change this. */
private static String getTMWToACIPErrorString(DuffCode dc) {
return "<<[[JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert DuffCode "
+ dc.toString(true)
+ " to ACIP. Please see the documentation for the TMW font and transcribe this yourself.]]>>";
}
/** /**
* Gets the Extended Wylie value for this glyph. * Gets the Extended Wylie value for this glyph.
* @param font the font of the TibetanMachineWeb * @param font the font of the TibetanMachineWeb
@ -1756,6 +1773,17 @@ public static String getWylieForGlyph(DuffCode dc, boolean noSuchWylie[]) {
return wylieForGlyph(hashKey); return wylieForGlyph(hashKey);
} }
// DLC DOC
public static String getACIPForGlyph(DuffCode dc, boolean noSuchACIP[]) {
String hashKey = getHashKeyForGlyph(dc);
String ans = (hashKey == null) ? null : acipForGlyph(hashKey);
if (hashKey == null || ans == null) {
noSuchACIP[0] = true;
return getTMWToACIPErrorString(dc);
}
return ans;
}
/** This addresses bug 624133, "Input freezes after impossible /** This addresses bug 624133, "Input freezes after impossible
* character". Returns true iff s is a proper prefix of some * character". Returns true iff s is a proper prefix of some
* legal input for this keyboard. In the extended Wylie * legal input for this keyboard. In the extended Wylie