From 316f59107bd63684743ea2455a4395a1f78c4a83 Mon Sep 17 00:00:00 2001 From: dchandler Date: Tue, 2 Sep 2003 06:39:33 +0000 Subject: [PATCH] A preliminary TMW->ACIP converter is here. There are known bugs, mostly with rare punctuation. --- source/org/thdl/tib/input/ConvertDialog.java | 2 + source/org/thdl/tib/input/ConverterGUI.java | 6 + .../tib/input/FontConverterConstants.java | 3 + .../org/thdl/tib/input/TibetanConverter.java | 16 +- source/org/thdl/tib/scanner/Manipulate.java | 11 + source/org/thdl/tib/text/TGCPair.java | 24 ++ source/org/thdl/tib/text/TibTextUtils.java | 217 +++++++++++------- source/org/thdl/tib/text/TibetanDocument.java | 59 ++++- .../org/thdl/tib/text/TibetanMachineWeb.java | 28 +++ 9 files changed, 278 insertions(+), 88 deletions(-) diff --git a/source/org/thdl/tib/input/ConvertDialog.java b/source/org/thdl/tib/input/ConvertDialog.java index afbdafa..4bba4cd 100644 --- a/source/org/thdl/tib/input/ConvertDialog.java +++ b/source/org/thdl/tib/input/ConvertDialog.java @@ -417,6 +417,8 @@ class ConvertDialog extends JDialog } else { // conversion {to Wylie or TM} mode if (TMW_TO_WYLIE == ct) { newFileNamePrefix = suggested_WYLIE_prefix; + } else if (TMW_TO_ACIP == ct) { + newFileNamePrefix = suggested_ACIP_prefix; } else if (TMW_TO_UNI == ct || ACIP_TO_UNI == ct) { newFileNamePrefix = suggested_TO_UNI_prefix; } else if (TM_TO_TMW == ct || ACIP_TO_TMW == ct) { diff --git a/source/org/thdl/tib/input/ConverterGUI.java b/source/org/thdl/tib/input/ConverterGUI.java index bd5e701..097ad37 100644 --- a/source/org/thdl/tib/input/ConverterGUI.java +++ b/source/org/thdl/tib/input/ConverterGUI.java @@ -78,6 +78,12 @@ public class ConverterGUI implements FontConversion, FontConverterConstants { "Attention required", JOptionPane.ERROR_MESSAGE); return false; + } else if (49 == returnCode) { + JOptionPane.showMessageDialog(cd, + "Though an output file has been created, it contains ugly\nerror messages like\n\"<<[[JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP:\n Cannot convert DuffCode...\".\nPlease edit the output by hand to replace all such\ncreatures with the correct ACIP transliteration.", + "Attention required", + JOptionPane.ERROR_MESSAGE); + return false; } else if (43 == returnCode) { JOptionPane.showMessageDialog(cd, "Though an output file has been created, this conversion did nothing.\nDid you choose the correct original file?\nDid you choose the correct type of conversion?", diff --git a/source/org/thdl/tib/input/FontConverterConstants.java b/source/org/thdl/tib/input/FontConverterConstants.java index fa25303..3d695f7 100644 --- a/source/org/thdl/tib/input/FontConverterConstants.java +++ b/source/org/thdl/tib/input/FontConverterConstants.java @@ -31,6 +31,7 @@ interface FontConverterConstants final String TM_TO_TMW = "TM to TMW"; final String TMW_TO_UNI = "TMW to Unicode"; final String TMW_TO_WYLIE = "TMW to Wylie"; + final String TMW_TO_ACIP = "TMW to ACIP"; final String TMW_TO_TM = "TMW to TM"; final String FIND_SOME_NON_TMW = "Find some non-TMW"; final String FIND_SOME_NON_TM = "Find some non-TM"; @@ -43,6 +44,7 @@ interface FontConverterConstants TM_TO_TMW, TMW_TO_UNI, TMW_TO_WYLIE, + TMW_TO_ACIP, TMW_TO_TM, FIND_SOME_NON_TMW, FIND_SOME_NON_TM, @@ -51,6 +53,7 @@ interface FontConverterConstants }; final String suggested_WYLIE_prefix = "THDL_Wylie_"; + final String suggested_ACIP_prefix = "ACIP_"; final String suggested_TO_TMW_prefix = "TMW_"; final String suggested_TO_UNI_prefix = "Uni_"; final String suggested_TO_TM_prefix = "TM_"; diff --git a/source/org/thdl/tib/input/TibetanConverter.java b/source/org/thdl/tib/input/TibetanConverter.java index d62d292..9379179 100644 --- a/source/org/thdl/tib/input/TibetanConverter.java +++ b/source/org/thdl/tib/input/TibetanConverter.java @@ -74,6 +74,7 @@ public class TibetanConverter implements FontConverterConstants { boolean convertACIPToTMWMode = false; boolean convertToTMWMode = false; boolean convertToWylieMode = false; + boolean convertToACIPMode = false; boolean findSomeNonTMWMode = false; boolean findAllNonTMWMode = false; boolean findSomeNonTMMode = false; @@ -98,6 +99,8 @@ public class TibetanConverter implements FontConverterConstants { = args[0].equals("--to-unicode")) || (convertToWylieMode = args[0].equals("--to-wylie")) + || (convertToACIPMode + = args[0].equals("--to-acip")) || (findSomeNonTMWMode = args[0].equals("--find-some-non-tmw")) || (findSomeNonTMMode @@ -107,7 +110,7 @@ public class TibetanConverter implements FontConverterConstants { ))) { out.println("TibetanConverter [--find-all-non-tmw | --find-some-non-tmw"); out.println(" | --to-tibetan-machine | --to-tibetan-machine-web"); - out.println(" | --to-unicode | --to-wylie] RTF_file"); + out.println(" | --to-unicode | --to-wylie | --to-acip] RTF_file"); out.println(" | TibetanConverter --acip-to-unicode TXT_file"); out.println(" | TibetanConverter [--version | -v | --help | -h]"); out.println(""); @@ -120,6 +123,7 @@ public class TibetanConverter implements FontConverterConstants { out.println(" --to-unicode to convert TibetanMachineWeb to Unicode"); out.println(" --to-tibetan-machine-web to convert TibetanMachine to TibetanMachineWeb"); out.println(" --to-wylie to convert TibetanMachineWeb to THDL Extended Wylie"); + out.println(" --to-acip to convert TibetanMachineWeb to ACIP"); out.println(" --acip-to-unicode to convert ACIP text file to Unicode text file"); out.println(" --find-all-non-tmw to locate all characters in the input document that are"); out.println(" not in Tibetan Machine Web fonts, exit zero if and only if none found"); @@ -177,6 +181,8 @@ public class TibetanConverter implements FontConverterConstants { } else { // conversion {to Wylie or TM} mode if (convertToWylieMode) { conversionTag = TMW_TO_WYLIE; + } else if (convertToACIPMode) { + conversionTag = TMW_TO_ACIP; } else if (convertToUnicodeMode) { conversionTag = TMW_TO_UNI; } else if (convertToTMWMode) { @@ -311,6 +317,7 @@ public class TibetanConverter implements FontConverterConstants { ThdlDebug.verify(((TMW_TO_TM == ct) ? 1 : 0) + ((TMW_TO_UNI == ct) ? 1 : 0) + ((TM_TO_TMW == ct) ? 1 : 0) + + ((TMW_TO_ACIP == ct) ? 1 : 0) + ((TMW_TO_WYLIE == ct) ? 1 : 0) == 1); long numAttemptedReplacements[] = new long[] { 0 }; @@ -321,6 +328,13 @@ public class TibetanConverter implements FontConverterConstants { numAttemptedReplacements)) { exitCode = 44; } + } else if (TMW_TO_ACIP == ct) { + // Convert to ACIP: + if (!tdoc.toACIP(0, + tdoc.getLength(), + numAttemptedReplacements)) { + exitCode = 49; + } } else if (TMW_TO_UNI == ct) { StringBuffer errors = new StringBuffer(); // Convert to Unicode: diff --git a/source/org/thdl/tib/scanner/Manipulate.java b/source/org/thdl/tib/scanner/Manipulate.java index 11a3fae..1d7571f 100644 --- a/source/org/thdl/tib/scanner/Manipulate.java +++ b/source/org/thdl/tib/scanner/Manipulate.java @@ -44,8 +44,13 @@ public class Manipulate return ch=='a' || ch=='e' || ch=='i' || ch=='o' || ch=='u'; } + /** Returns null on error. */ public static String wylieToAcip(String palabra) { + // DLC FIXME: for unknown things, return null. + if (palabra.equals("@#")) return "*"; + if (palabra.startsWith("@") || palabra.startsWith("#")) + return null; // we can't convert this in isolation! We need context. char []caract; int i, j, len; String nuevaPalabra; @@ -83,6 +88,12 @@ public class Manipulate nuevaPalabra = replace(nuevaPalabra, "TSH", "TQQ"); nuevaPalabra = replace(nuevaPalabra, "TS", "TZ"); nuevaPalabra = replace(nuevaPalabra, "TQQ", "TS"); + nuevaPalabra = replace(nuevaPalabra, "a", "'A"); + nuevaPalabra = replace(nuevaPalabra, "i", "'I"); + nuevaPalabra = replace(nuevaPalabra, "u", "'U"); + nuevaPalabra = replace(nuevaPalabra, "-I", "i"); + nuevaPalabra = replace(nuevaPalabra, "/", ","); + nuevaPalabra = replace(nuevaPalabra, "_", " "); nuevaPalabra = fixWazur(nuevaPalabra); return nuevaPalabra; } diff --git a/source/org/thdl/tib/text/TGCPair.java b/source/org/thdl/tib/text/TGCPair.java index fe3d305..8605de4 100644 --- a/source/org/thdl/tib/text/TGCPair.java +++ b/source/org/thdl/tib/text/TGCPair.java @@ -86,6 +86,30 @@ public class TGCPair { b.append(vowelWylie); return b.toString(); } + public String getACIP() { + // DLC FIXME: has the EWTS change affected Manipulate.acipToWylie? + StringBuffer b = new StringBuffer(); + if (consonantWylie != null) { + String consonantACIP // DLC FIXME can KAsh occur? + = org.thdl.tib.scanner.Manipulate.wylieToAcip(consonantWylie); + if (null == consonantACIP) throw new Error("how?"); + // System.out.println("DLC: Wylie=" + consonantWylie + ", ACIP=" + consonantACIP); + // we may have {P-Y}, but the user wants to see {PY}. + for (int i = 0; i < consonantACIP.length(); i++) { + char ch = consonantACIP.charAt(i); + if ('-' != ch) + b.append(ch); + } + } + if (vowelWylie != null) { + String vowelACIP // DLC FIXME look for exceptions + = org.thdl.tib.scanner.Manipulate.wylieToAcip(vowelWylie); + // System.out.println("DLC: Wylie=" + vowelWylie + ", ACIP=" + vowelACIP); + if (null == vowelACIP) throw new Error("how?"); + b.append(vowelACIP); + } + return b.toString(); + } public int classification; /** Constructs a new TGCPair with (Tibetan or Sanskrit) consonant * consonantWylie and vowel vowelWylie. Use diff --git a/source/org/thdl/tib/text/TibTextUtils.java b/source/org/thdl/tib/text/TibTextUtils.java index a191c9c..a83df5e 100644 --- a/source/org/thdl/tib/text/TibTextUtils.java +++ b/source/org/thdl/tib/text/TibTextUtils.java @@ -786,37 +786,50 @@ public class TibTextUtils implements THDLWylieConstants { private static final boolean makeIllegalTibetanGoEndToEnd = true; - /** Returns "a", unless wylie is already "a". */ - private static String aVowelToUseAfter(String wylie) { + /** Returns "a"/"A", unless wylie (which really is EWTS, not ACIP) + is already "a". */ + private static String aVowelToUseAfter(boolean EWTSNotACIP, String wylie) { if (wylie.equals(ACHEN)) return ""; else - return WYLIE_aVOWEL; + return (EWTSNotACIP) ? WYLIE_aVOWEL : "A"; } - private static String unambiguousPostAVowelWylie(String wylie1, - String wylie2) { + private static String unambiguousPostAVowelTranslit(boolean EWTSNotACIP, + String wylie1, + String wylie2, + String acip1, + String acip2) { String disambiguator = ""; // type "lard" vs. "lar.d", and you'll see the need for this // disambiguation of suffix and postsuffix. sa doesn't take // any head letters, so only da needs to be considered. if (TibetanMachineWeb.isWylieTop(wylie1) && wylie2.equals(/* FIXME: hard-coded */ "d")) - disambiguator = WYLIE_DISAMBIGUATING_KEY_STRING; - return wylie1 + disambiguator + wylie2; + disambiguator = (EWTSNotACIP) ? WYLIE_DISAMBIGUATING_KEY_STRING : "-"; + if (EWTSNotACIP) + return wylie1 + disambiguator + wylie2; + else + return acip1 + disambiguator + acip2; } /** -* Gets the Extended Wylie for a sequence of glyphs. +* Gets the Extended Wylie for the given sequence of glyphs if +* EWTSNotACIP is true, or the ACIP otherwise. +* @param EWTSNotACIP true if you want THDL Extended Wylie, false if +* you want ACIP * @param dcs an array of glyphs -* @param noSuchWylie an array which will not be touched if this is -* successful; however, if there is no THDL Extended Wylie -* corresponding to these glyphs, then noSuchWylie[0] will be set to -* true -* @return the Extended Wylie corresponding to these glyphs, or null */ - public static String getWylie(DuffCode[] dcs, boolean noSuchWylie[]) { +* @param noSuch an array which will not be touched if this is +* successful; however, if there is no THDL Extended Wylie/ACIP +* corresponding to these glyphs, then noSuch[0] will be set to true +* @return the Extended Wylie/ACIP corresponding to these glyphs, or +* null */ + public static String getTranslit(boolean EWTSNotACIP, + DuffCode[] dcs, + boolean noSuch[]) { StringBuffer warnings = (debug ? new StringBuffer() : null); - String ans = getWylieImplementation(dcs, noSuchWylie, warnings); + String ans + = getTranslitImplementation(EWTSNotACIP, dcs, noSuch, warnings); if (debug && warnings.length() > 0) System.out.println("DEBUG: warnings in TMW->Wylie: " + warnings); return ans; @@ -1172,13 +1185,13 @@ public class TibTextUtils implements THDLWylieConstants { return candidateType; } - /** Appends to wylieBuffer the wylie for the glyph list glyphList - (which should be an ArrayList for speed). This will be very - user-friendly for "legal tsheg bars" and will be valid, but - possibly ugly (interspersed with disambiguators or extra - vowels, etc.) Wylie for other things, such as Sanskrit - transliteration. Updates warnings and noSuchWylie like the - caller does. + /** Appends to translitBuffer the EWTS/ACIP for the glyph list + glyphList (which should be an ArrayList for speed). This will + be very user-friendly for "legal tsheg bars" and will be + valid, but possibly ugly (interspersed with disambiguators or + extra vowels, etc.) Wylie/ACIP for other things, such as + Sanskrit transliteration. Updates warnings and noSuch like + the caller does.

What constitutes a legal, non-punctuation, non-whitespace tsheg bar? The following are the only such:

@@ -1219,22 +1232,23 @@ public class TibTextUtils implements THDLWylieConstants {

When there are three unadorned consonant stacks in a tyllable, a hard-coded list of valid Tibetan tsheg bars is - relied upon to determine if the 'a' vowel comes after the - first or the second consonant.

*/ - private static void getTshegBarWylie(java.util.List glyphList, - boolean noSuchWylie[], - StringBuffer warnings, - StringBuffer wylieBuffer) { + relied upon to determine if the 'a'/'A' vowel comes after + the first or the second consonant.

*/ + private static void getTshegBarTranslit(boolean EWTSNotACIP, + java.util.List glyphList, + boolean noSuch[], + StringBuffer warnings, + StringBuffer translitBuffer) { TGCList gcs - = breakTshegBarIntoGraphemeClusters(glyphList, noSuchWylie); + = breakTshegBarIntoGraphemeClusters(glyphList, noSuch); String candidateType = getClassificationOfTshegBar(gcs, warnings, false); int sz = gcs.size(); if (candidateType == "invalid" || candidateType == "single-sanskrit-gc") { // Forget beauty and succintness -- just be sure to - // generate Wylie that can be converted unambiguously into - // Tibetan. Use a disambiguator or vowel after each - // grapheme cluster. + // generate transliteration that can be converted + // unambiguously into Tibetan. Use a disambiguator or + // vowel after each grapheme cluster. // // If we truly didn't care about beauty, we'd just lump // SANSKRIT_WITHOUT_VOWEL and SANSKRIT_WITH_VOWEL into @@ -1244,19 +1258,20 @@ public class TibTextUtils implements THDLWylieConstants { TGCPair tp = (TGCPair)gcs.get(i); int cls = tp.classification; String wylie = tp.getWylie(); - wylieBuffer.append(wylie); + String translit = (EWTSNotACIP) ? wylie : tp.getACIP(); + translitBuffer.append(translit); if (TibetanMachineWeb.isWylieTibetanConsonantOrConsonantStack(wylie) || TibetanMachineWeb.isWylieSanskritConsonantStack(wylie)) { - wylieBuffer.append(aVowelToUseAfter(wylie)); + translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie)); } else { if (TGCPair.CONSONANTAL_WITH_VOWEL != cls && TGCPair.SANSKRIT_WITH_VOWEL != cls) - wylieBuffer.append(WYLIE_DISAMBIGUATING_KEY); + translitBuffer.append(EWTSNotACIP ? WYLIE_DISAMBIGUATING_KEY : '-'); } } } else { - // Generate perfect, beautiful, Wylie, using the minimum - // number of vowels and disambiguators. + // Generate perfect, beautiful transliteration, using the + // minimum number of vowels and disambiguators. int leftover = sz + 1; @@ -1299,23 +1314,44 @@ public class TibTextUtils implements THDLWylieConstants { String wylie1 = ((TGCPair)gcs.get(0)).getWylie(); String wylie2 = ((TGCPair)gcs.get(1)).getWylie(); String wylie3 = ((TGCPair)gcs.get(2)).getWylie(); + String acip1 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(0)).getACIP(); + String acip2 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(1)).getACIP(); + String acip3 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(2)).getACIP(); if ((wylie1.equals("g") && (wylie2.equals("d") || wylie2.equals("n") || wylie2.equals("s"))) || (wylie1.equals("d") && (wylie2.equals("g") || wylie2.equals("m"))) || (wylie1.equals("b") && wylie2.equals("d")) || (wylie1.equals("m") && wylie2.equals("d")) || (wylie1.equals("'") && (wylie2.equals("g") || wylie2.equals("d") || wylie2.equals("b")))) { if (TibetanMachineWeb.isAmbiguousWylie(wylie1, wylie2)) - wylieBuffer.append(wylie1 + WYLIE_DISAMBIGUATING_KEY + wylie2); + if (EWTSNotACIP) + translitBuffer.append(wylie1 + WYLIE_DISAMBIGUATING_KEY + wylie2); + else + translitBuffer.append(acip1 + '-' + acip2); else - wylieBuffer.append(wylie1 + wylie2); + if (EWTSNotACIP) + translitBuffer.append(wylie1 + wylie2); + else + translitBuffer.append(acip1 + acip2); - wylieBuffer.append(aVowelToUseAfter(wylie2) - + wylie3); + translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie2) + + (EWTSNotACIP ? wylie3 : acip3)); } else { - wylieBuffer.append(wylie1 - + aVowelToUseAfter(wylie1) - + unambiguousPostAVowelWylie(wylie2, - wylie3)); + if (EWTSNotACIP) + translitBuffer.append(wylie1 + + aVowelToUseAfter(EWTSNotACIP, wylie1) + + unambiguousPostAVowelTranslit(EWTSNotACIP, + wylie2, + wylie3, + acip2, + acip3)); + else + translitBuffer.append(acip1 + + aVowelToUseAfter(EWTSNotACIP, wylie1) + + unambiguousPostAVowelTranslit(EWTSNotACIP, + wylie2, + wylie3, + acip2, + acip3)); } } else if ("root" == candidateType || "prefix/root-root/suffix" == candidateType @@ -1323,13 +1359,14 @@ public class TibTextUtils implements THDLWylieConstants { || "root-suffix-postsuffix" == candidateType || "root-suffix" == candidateType) { String wylie1 = ((TGCPair)gcs.get(0)).getWylie(); + String acip1 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(0)).getACIP(); leftover = 1; - wylieBuffer.append(wylie1); + translitBuffer.append((EWTSNotACIP) ? wylie1 : acip1); if (((TGCPair)gcs.get(0)).classification != TGCPair.CONSONANTAL_WITH_VOWEL) { ThdlDebug.verify(TGCPair.CONSONANTAL_WITHOUT_VOWEL == ((TGCPair)gcs.get(0)).classification); - wylieBuffer.append(aVowelToUseAfter(wylie1)); + translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie1)); if (debug) System.out.println("DEBUG: appending vowel"); } else { if (debug) System.out.println("DEBUG: already has vowel 2"); @@ -1338,26 +1375,39 @@ public class TibTextUtils implements THDLWylieConstants { leftover = 3; String wylie2 = ((TGCPair)gcs.get(1)).getWylie(); String wylie3 = ((TGCPair)gcs.get(2)).getWylie(); - wylieBuffer.append(unambiguousPostAVowelWylie(wylie2, - wylie3)); + String acip2 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(1)).getACIP(); + String acip3 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(2)).getACIP(); + translitBuffer.append(unambiguousPostAVowelTranslit(EWTSNotACIP, + wylie2, + wylie3, + acip2, + acip3)); } } else if ("prefix-root-suffix" == candidateType || "prefix-root" == candidateType || "prefix-root-suffix-postsuffix" == candidateType) { String wylie1 = ((TGCPair)gcs.get(0)).getWylie(); String wylie2 = ((TGCPair)gcs.get(1)).getWylie(); + String acip1 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(0)).getACIP(); + String acip2 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(1)).getACIP(); leftover = 2; if (TibetanMachineWeb.isAmbiguousWylie(wylie1, wylie2)) - wylieBuffer.append(wylie1 + WYLIE_DISAMBIGUATING_KEY + wylie2); + if (EWTSNotACIP) + translitBuffer.append(wylie1 + WYLIE_DISAMBIGUATING_KEY + wylie2); + else + translitBuffer.append(acip1 + '-' + acip2); else - wylieBuffer.append(wylie1 + wylie2); + if (EWTSNotACIP) + translitBuffer.append(wylie1 + wylie2); + else + translitBuffer.append(acip1 + acip2); if (((TGCPair)gcs.get(1)).classification != TGCPair.CONSONANTAL_WITH_VOWEL) { ThdlDebug.verify(TGCPair.CONSONANTAL_WITHOUT_VOWEL == ((TGCPair)gcs.get(1)).classification); if (debug) System.out.println("DEBUG: appending vowel"); - wylieBuffer.append(aVowelToUseAfter(wylie2)); + translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie2)); } else { if (debug) System.out.println("DEBUG: already has vowel 1"); } @@ -1365,8 +1415,13 @@ public class TibTextUtils implements THDLWylieConstants { leftover = 4; String wylie3 = ((TGCPair)gcs.get(2)).getWylie(); String wylie4 = ((TGCPair)gcs.get(3)).getWylie(); - wylieBuffer.append(unambiguousPostAVowelWylie(wylie3, - wylie4)); + String acip3 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(2)).getACIP(); + String acip4 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(3)).getACIP(); + translitBuffer.append(unambiguousPostAVowelTranslit(EWTSNotACIP, + wylie3, + wylie4, + acip3, + acip4)); } } else if ("number" == candidateType) { leftover = 0; @@ -1374,18 +1429,17 @@ public class TibTextUtils implements THDLWylieConstants { throw new Error("missed a case down here"); } - // append the wylie left over: + // append the wylie/ACIP left over: for (int i = leftover; i < sz; i++) { TGCPair tp = (TGCPair)gcs.get(i); - String wylie = tp.getWylie(); - wylieBuffer.append(wylie); + translitBuffer.append(EWTSNotACIP ? tp.getWylie() : tp.getACIP()); } } } /** -* Gets the Extended Wylie for a sequence of glyphs. This works as -* follows: +* Gets the Extended Wylie/ACIP for a sequence of glyphs. This works +* as follows: * *

We run along until we hit whitespace or punctuation. We take * everything before that and we see if it's a legal Tibetan tsheg bar, @@ -1393,22 +1447,25 @@ public class TibTextUtils implements THDLWylieConstants { * vowel in the correct place. If not, then we throw a disambiguating * key or a vowel after each stack. * +* @param EWTSNotACIP true if you want THDL Extended Wylie, false if +* you want ACIP * @param dcs an array of glyphs -* @param noSuchWylie an array which will not be touched if this is -* successful; however, if there is no THDL Extended Wylie -* corresponding to these glyphs, then noSuchWylie[0] will be set to -* true +* @param noSuch an array which will not be touched if this is +* successful; however, if there is no THDL Extended Wylie/ACIP +* corresponding to these glyphs, then noSuch[0] will be set to true * @param warnings either null or a buffer to which will be appended * warnings about illegal tsheg bars -* @return the Extended Wylie corresponding to these glyphs, or null */ - public static String getWylieImplementation(DuffCode[] dcs, - boolean noSuchWylie[], - StringBuffer warnings) { +* @return the Extended Wylie/ACIP corresponding to these glyphs, or +* null */ + private static String getTranslitImplementation(boolean EWTSNotACIP, + DuffCode[] dcs, + boolean noSuch[], + StringBuffer warnings) { if (dcs.length == 0) return null; ArrayList glyphList = new ArrayList(); - StringBuffer wylieBuffer = new StringBuffer(); + StringBuffer translitBuffer = new StringBuffer(); for (int i=0; i 0) - return wylieBuffer.toString(); + if (translitBuffer.length() > 0) + return translitBuffer.toString(); else return null; } diff --git a/source/org/thdl/tib/text/TibetanDocument.java b/source/org/thdl/tib/text/TibetanDocument.java index 48bca39..cbd7198 100644 --- a/source/org/thdl/tib/text/TibetanDocument.java +++ b/source/org/thdl/tib/text/TibetanDocument.java @@ -294,6 +294,18 @@ public class TibetanDocument extends DefaultStyledDocument { return getWylie(0, getLength(), noSuchWylie); } +/** +* Converts the entire document into ACIP. If the document consists of +* both Tibetan and non-Tibetan fonts, however, the conversion stops at +* the first non-Tibetan font. +* @param noSuchACIP an array which will not be touched if this is +* successful; however, if there is no ACIP corresponding to one of +* these glyphs, then noSuchACIP[0] will be set to true +* @return the string of ACIP corresponding to this document */ + public String getACIP(boolean noSuchACIP[]) { + return getACIP(0, getLength(), noSuchACIP); + } + /** * Converts a portion of the document into Extended Wylie. * If the document consists of both Tibetan and @@ -306,7 +318,25 @@ public class TibetanDocument extends DefaultStyledDocument { * corresponding to one of these glyphs, then noSuchWylie[0] will be * set to true * @return the string of Wylie corresponding to this document */ - public String getWylie(int begin, int end, boolean noSuchWylie[]) { + public String getWylie(int begin, int end, boolean noSuchWylie[]) { + return getTranslit(true, begin, end, noSuchWylie); + } + +/** +* Converts a portion of the document into ACIP. If the document +* consists of both Tibetan and non-Tibetan fonts, however, the +* conversion stops at the first non-Tibetan font. +* @param begin the beginning of the region to convert +* @param end the end of the region to convert +* @param noSuchWylie an array which will not be touched if this is +* successful; however, if there is no ACIP corresponding to one of +* these glyphs, then noSuchACIP[0] will be set to true +* @return the string of ACIP corresponding to this document */ + public String getACIP(int begin, int end, boolean noSuchACIP[]) { + return getTranslit(true, begin, end, noSuchACIP); + } + + private String getTranslit(boolean EWTSNotACIP, int begin, int end, boolean noSuch[]) { AttributeSet attr; String fontName; int fontNum; @@ -318,7 +348,7 @@ public class TibetanDocument extends DefaultStyledDocument { java.util.List dcs = new ArrayList(); int i = begin; - StringBuffer wylieBuffer = new StringBuffer(); + StringBuffer translitBuffer = new StringBuffer(); try { while (i < end) { @@ -332,10 +362,10 @@ public class TibetanDocument extends DefaultStyledDocument { if (dcs.size() > 0) { DuffCode[] dc_array = new DuffCode[0]; dc_array = (DuffCode[])dcs.toArray(dc_array); - wylieBuffer.append(TibTextUtils.getWylie(dc_array, noSuchWylie)); + translitBuffer.append(TibTextUtils.getTranslit(EWTSNotACIP, dc_array, noSuch)); dcs.clear(); } - wylieBuffer.append(ch); + translitBuffer.append(ch); } //current character isn't TMW @@ -343,7 +373,7 @@ public class TibetanDocument extends DefaultStyledDocument { if (dcs.size() > 0) { DuffCode[] dc_array = new DuffCode[0]; dc_array = (DuffCode[])dcs.toArray(dc_array); - wylieBuffer.append(TibTextUtils.getWylie(dc_array, noSuchWylie)); + translitBuffer.append(TibTextUtils.getTranslit(EWTSNotACIP, dc_array, noSuch)); dcs.clear(); } } @@ -358,9 +388,9 @@ public class TibetanDocument extends DefaultStyledDocument { if (dcs.size() > 0) { DuffCode[] dc_array = new DuffCode[0]; dc_array = (DuffCode[])dcs.toArray(dc_array); - wylieBuffer.append(TibTextUtils.getWylie(dc_array, noSuchWylie)); + translitBuffer.append(TibTextUtils.getTranslit(EWTSNotACIP, dc_array, noSuch)); } - return wylieBuffer.toString(); + return translitBuffer.toString(); } catch (BadLocationException ble) { ble.printStackTrace(); @@ -1101,6 +1131,17 @@ public class TibetanDocument extends DefaultStyledDocument { * DuffCode..." text into the document */ public boolean toWylie(int start, int end, long numAttemptedReplacements[]) { + return toTranslit(true, start, end, numAttemptedReplacements); + } + + // DLC DOC just like {@link #toWylie(int,int,long[])} + public boolean toACIP(int start, int end, + long numAttemptedReplacements[]) { + return toTranslit(false, start, end, numAttemptedReplacements); + } + + private boolean toTranslit(boolean EWTSNotACIP, int start, int end, + long numAttemptedReplacements[]) { if (start >= end) return true; @@ -1124,7 +1165,9 @@ public class TibetanDocument extends DefaultStyledDocument { remove(start, i-start); ThdlDebug.verify(getRomanAttributeSet() != null); insertString(start, - TibTextUtils.getWylie(dc_array, noSuchWylie), + TibTextUtils.getTranslit(EWTSNotACIP, + dc_array, + noSuchWylie), getRomanAttributeSet()); dcs.clear(); } diff --git a/source/org/thdl/tib/text/TibetanMachineWeb.java b/source/org/thdl/tib/text/TibetanMachineWeb.java index f0d3807..52304dc 100644 --- a/source/org/thdl/tib/text/TibetanMachineWeb.java +++ b/source/org/thdl/tib/text/TibetanMachineWeb.java @@ -1706,6 +1706,13 @@ public static String wylieForGlyph(String hashKey) { return sb.toString(); } + // DLC DOC +private static String acipForGlyph(String hashKey) { + String ACIP // DLC FIXME: test this. + = org.thdl.tib.scanner.Manipulate.wylieToAcip(hashKey); + return ACIP; +} + /** Error that appears in a document when some TMW cannot be * transcribed in THDL Extended Wylie. This error message is * documented in www/htdocs/TMW_RTF_TO_THDL_WYLIE.html, so change @@ -1716,6 +1723,16 @@ private static String getTMWToWylieErrorString(DuffCode dc) { + " to THDL Extended Wylie. Please see the documentation for the TMW font and transcribe this yourself.]]>>"; } +/** Error that appears in a document when some TMW cannot be + * transcribed in ACIP. This error message is + * documented in www/htdocs/TMW_RTF_TO_THDL_WYLIE.html (DLC NOT YET), so change + * them both when you change this. */ +private static String getTMWToACIPErrorString(DuffCode dc) { + return "<<[[JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert DuffCode " + + dc.toString(true) + + " to ACIP. Please see the documentation for the TMW font and transcribe this yourself.]]>>"; +} + /** * Gets the Extended Wylie value for this glyph. * @param font the font of the TibetanMachineWeb @@ -1756,6 +1773,17 @@ public static String getWylieForGlyph(DuffCode dc, boolean noSuchWylie[]) { return wylieForGlyph(hashKey); } +// DLC DOC +public static String getACIPForGlyph(DuffCode dc, boolean noSuchACIP[]) { + String hashKey = getHashKeyForGlyph(dc); + String ans = (hashKey == null) ? null : acipForGlyph(hashKey); + if (hashKey == null || ans == null) { + noSuchACIP[0] = true; + return getTMWToACIPErrorString(dc); + } + return ans; +} + /** This addresses bug 624133, "Input freezes after impossible * character". Returns true iff s is a proper prefix of some * legal input for this keyboard. In the extended Wylie