From 316f59107bd63684743ea2455a4395a1f78c4a83 Mon Sep 17 00:00:00 2001
From: dchandler
Date: Tue, 2 Sep 2003 06:39:33 +0000
Subject: [PATCH] A preliminary TMW->ACIP converter is here. There are known
bugs, mostly with rare punctuation.
---
source/org/thdl/tib/input/ConvertDialog.java | 2 +
source/org/thdl/tib/input/ConverterGUI.java | 6 +
.../tib/input/FontConverterConstants.java | 3 +
.../org/thdl/tib/input/TibetanConverter.java | 16 +-
source/org/thdl/tib/scanner/Manipulate.java | 11 +
source/org/thdl/tib/text/TGCPair.java | 24 ++
source/org/thdl/tib/text/TibTextUtils.java | 217 +++++++++++-------
source/org/thdl/tib/text/TibetanDocument.java | 59 ++++-
.../org/thdl/tib/text/TibetanMachineWeb.java | 28 +++
9 files changed, 278 insertions(+), 88 deletions(-)
diff --git a/source/org/thdl/tib/input/ConvertDialog.java b/source/org/thdl/tib/input/ConvertDialog.java
index afbdafa..4bba4cd 100644
--- a/source/org/thdl/tib/input/ConvertDialog.java
+++ b/source/org/thdl/tib/input/ConvertDialog.java
@@ -417,6 +417,8 @@ class ConvertDialog extends JDialog
} else { // conversion {to Wylie or TM} mode
if (TMW_TO_WYLIE == ct) {
newFileNamePrefix = suggested_WYLIE_prefix;
+ } else if (TMW_TO_ACIP == ct) {
+ newFileNamePrefix = suggested_ACIP_prefix;
} else if (TMW_TO_UNI == ct || ACIP_TO_UNI == ct) {
newFileNamePrefix = suggested_TO_UNI_prefix;
} else if (TM_TO_TMW == ct || ACIP_TO_TMW == ct) {
diff --git a/source/org/thdl/tib/input/ConverterGUI.java b/source/org/thdl/tib/input/ConverterGUI.java
index bd5e701..097ad37 100644
--- a/source/org/thdl/tib/input/ConverterGUI.java
+++ b/source/org/thdl/tib/input/ConverterGUI.java
@@ -78,6 +78,12 @@ public class ConverterGUI implements FontConversion, FontConverterConstants {
"Attention required",
JOptionPane.ERROR_MESSAGE);
return false;
+ } else if (49 == returnCode) {
+ JOptionPane.showMessageDialog(cd,
+ "Though an output file has been created, it contains ugly\nerror messages like\n\"<<[[JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP:\n Cannot convert DuffCode...\".\nPlease edit the output by hand to replace all such\ncreatures with the correct ACIP transliteration.",
+ "Attention required",
+ JOptionPane.ERROR_MESSAGE);
+ return false;
} else if (43 == returnCode) {
JOptionPane.showMessageDialog(cd,
"Though an output file has been created, this conversion did nothing.\nDid you choose the correct original file?\nDid you choose the correct type of conversion?",
diff --git a/source/org/thdl/tib/input/FontConverterConstants.java b/source/org/thdl/tib/input/FontConverterConstants.java
index fa25303..3d695f7 100644
--- a/source/org/thdl/tib/input/FontConverterConstants.java
+++ b/source/org/thdl/tib/input/FontConverterConstants.java
@@ -31,6 +31,7 @@ interface FontConverterConstants
final String TM_TO_TMW = "TM to TMW";
final String TMW_TO_UNI = "TMW to Unicode";
final String TMW_TO_WYLIE = "TMW to Wylie";
+ final String TMW_TO_ACIP = "TMW to ACIP";
final String TMW_TO_TM = "TMW to TM";
final String FIND_SOME_NON_TMW = "Find some non-TMW";
final String FIND_SOME_NON_TM = "Find some non-TM";
@@ -43,6 +44,7 @@ interface FontConverterConstants
TM_TO_TMW,
TMW_TO_UNI,
TMW_TO_WYLIE,
+ TMW_TO_ACIP,
TMW_TO_TM,
FIND_SOME_NON_TMW,
FIND_SOME_NON_TM,
@@ -51,6 +53,7 @@ interface FontConverterConstants
};
final String suggested_WYLIE_prefix = "THDL_Wylie_";
+ final String suggested_ACIP_prefix = "ACIP_";
final String suggested_TO_TMW_prefix = "TMW_";
final String suggested_TO_UNI_prefix = "Uni_";
final String suggested_TO_TM_prefix = "TM_";
diff --git a/source/org/thdl/tib/input/TibetanConverter.java b/source/org/thdl/tib/input/TibetanConverter.java
index d62d292..9379179 100644
--- a/source/org/thdl/tib/input/TibetanConverter.java
+++ b/source/org/thdl/tib/input/TibetanConverter.java
@@ -74,6 +74,7 @@ public class TibetanConverter implements FontConverterConstants {
boolean convertACIPToTMWMode = false;
boolean convertToTMWMode = false;
boolean convertToWylieMode = false;
+ boolean convertToACIPMode = false;
boolean findSomeNonTMWMode = false;
boolean findAllNonTMWMode = false;
boolean findSomeNonTMMode = false;
@@ -98,6 +99,8 @@ public class TibetanConverter implements FontConverterConstants {
= args[0].equals("--to-unicode"))
|| (convertToWylieMode
= args[0].equals("--to-wylie"))
+ || (convertToACIPMode
+ = args[0].equals("--to-acip"))
|| (findSomeNonTMWMode
= args[0].equals("--find-some-non-tmw"))
|| (findSomeNonTMMode
@@ -107,7 +110,7 @@ public class TibetanConverter implements FontConverterConstants {
))) {
out.println("TibetanConverter [--find-all-non-tmw | --find-some-non-tmw");
out.println(" | --to-tibetan-machine | --to-tibetan-machine-web");
- out.println(" | --to-unicode | --to-wylie] RTF_file");
+ out.println(" | --to-unicode | --to-wylie | --to-acip] RTF_file");
out.println(" | TibetanConverter --acip-to-unicode TXT_file");
out.println(" | TibetanConverter [--version | -v | --help | -h]");
out.println("");
@@ -120,6 +123,7 @@ public class TibetanConverter implements FontConverterConstants {
out.println(" --to-unicode to convert TibetanMachineWeb to Unicode");
out.println(" --to-tibetan-machine-web to convert TibetanMachine to TibetanMachineWeb");
out.println(" --to-wylie to convert TibetanMachineWeb to THDL Extended Wylie");
+ out.println(" --to-acip to convert TibetanMachineWeb to ACIP");
out.println(" --acip-to-unicode to convert ACIP text file to Unicode text file");
out.println(" --find-all-non-tmw to locate all characters in the input document that are");
out.println(" not in Tibetan Machine Web fonts, exit zero if and only if none found");
@@ -177,6 +181,8 @@ public class TibetanConverter implements FontConverterConstants {
} else { // conversion {to Wylie or TM} mode
if (convertToWylieMode) {
conversionTag = TMW_TO_WYLIE;
+ } else if (convertToACIPMode) {
+ conversionTag = TMW_TO_ACIP;
} else if (convertToUnicodeMode) {
conversionTag = TMW_TO_UNI;
} else if (convertToTMWMode) {
@@ -311,6 +317,7 @@ public class TibetanConverter implements FontConverterConstants {
ThdlDebug.verify(((TMW_TO_TM == ct) ? 1 : 0)
+ ((TMW_TO_UNI == ct) ? 1 : 0)
+ ((TM_TO_TMW == ct) ? 1 : 0)
+ + ((TMW_TO_ACIP == ct) ? 1 : 0)
+ ((TMW_TO_WYLIE == ct) ? 1 : 0)
== 1);
long numAttemptedReplacements[] = new long[] { 0 };
@@ -321,6 +328,13 @@ public class TibetanConverter implements FontConverterConstants {
numAttemptedReplacements)) {
exitCode = 44;
}
+ } else if (TMW_TO_ACIP == ct) {
+ // Convert to ACIP:
+ if (!tdoc.toACIP(0,
+ tdoc.getLength(),
+ numAttemptedReplacements)) {
+ exitCode = 49;
+ }
} else if (TMW_TO_UNI == ct) {
StringBuffer errors = new StringBuffer();
// Convert to Unicode:
diff --git a/source/org/thdl/tib/scanner/Manipulate.java b/source/org/thdl/tib/scanner/Manipulate.java
index 11a3fae..1d7571f 100644
--- a/source/org/thdl/tib/scanner/Manipulate.java
+++ b/source/org/thdl/tib/scanner/Manipulate.java
@@ -44,8 +44,13 @@ public class Manipulate
return ch=='a' || ch=='e' || ch=='i' || ch=='o' || ch=='u';
}
+ /** Returns null on error. */
public static String wylieToAcip(String palabra)
{
+ // DLC FIXME: for unknown things, return null.
+ if (palabra.equals("@#")) return "*";
+ if (palabra.startsWith("@") || palabra.startsWith("#"))
+ return null; // we can't convert this in isolation! We need context.
char []caract;
int i, j, len;
String nuevaPalabra;
@@ -83,6 +88,12 @@ public class Manipulate
nuevaPalabra = replace(nuevaPalabra, "TSH", "TQQ");
nuevaPalabra = replace(nuevaPalabra, "TS", "TZ");
nuevaPalabra = replace(nuevaPalabra, "TQQ", "TS");
+ nuevaPalabra = replace(nuevaPalabra, "a", "'A");
+ nuevaPalabra = replace(nuevaPalabra, "i", "'I");
+ nuevaPalabra = replace(nuevaPalabra, "u", "'U");
+ nuevaPalabra = replace(nuevaPalabra, "-I", "i");
+ nuevaPalabra = replace(nuevaPalabra, "/", ",");
+ nuevaPalabra = replace(nuevaPalabra, "_", " ");
nuevaPalabra = fixWazur(nuevaPalabra);
return nuevaPalabra;
}
diff --git a/source/org/thdl/tib/text/TGCPair.java b/source/org/thdl/tib/text/TGCPair.java
index fe3d305..8605de4 100644
--- a/source/org/thdl/tib/text/TGCPair.java
+++ b/source/org/thdl/tib/text/TGCPair.java
@@ -86,6 +86,30 @@ public class TGCPair {
b.append(vowelWylie);
return b.toString();
}
+ public String getACIP() {
+ // DLC FIXME: has the EWTS change affected Manipulate.acipToWylie?
+ StringBuffer b = new StringBuffer();
+ if (consonantWylie != null) {
+ String consonantACIP // DLC FIXME can KAsh occur?
+ = org.thdl.tib.scanner.Manipulate.wylieToAcip(consonantWylie);
+ if (null == consonantACIP) throw new Error("how?");
+ // System.out.println("DLC: Wylie=" + consonantWylie + ", ACIP=" + consonantACIP);
+ // we may have {P-Y}, but the user wants to see {PY}.
+ for (int i = 0; i < consonantACIP.length(); i++) {
+ char ch = consonantACIP.charAt(i);
+ if ('-' != ch)
+ b.append(ch);
+ }
+ }
+ if (vowelWylie != null) {
+ String vowelACIP // DLC FIXME look for exceptions
+ = org.thdl.tib.scanner.Manipulate.wylieToAcip(vowelWylie);
+ // System.out.println("DLC: Wylie=" + vowelWylie + ", ACIP=" + vowelACIP);
+ if (null == vowelACIP) throw new Error("how?");
+ b.append(vowelACIP);
+ }
+ return b.toString();
+ }
public int classification;
/** Constructs a new TGCPair with (Tibetan or Sanskrit) consonant
* consonantWylie and vowel vowelWylie. Use
diff --git a/source/org/thdl/tib/text/TibTextUtils.java b/source/org/thdl/tib/text/TibTextUtils.java
index a191c9c..a83df5e 100644
--- a/source/org/thdl/tib/text/TibTextUtils.java
+++ b/source/org/thdl/tib/text/TibTextUtils.java
@@ -786,37 +786,50 @@ public class TibTextUtils implements THDLWylieConstants {
private static final boolean makeIllegalTibetanGoEndToEnd = true;
- /** Returns "a", unless wylie is already "a". */
- private static String aVowelToUseAfter(String wylie) {
+ /** Returns "a"/"A", unless wylie (which really is EWTS, not ACIP)
+ is already "a". */
+ private static String aVowelToUseAfter(boolean EWTSNotACIP, String wylie) {
if (wylie.equals(ACHEN))
return "";
else
- return WYLIE_aVOWEL;
+ return (EWTSNotACIP) ? WYLIE_aVOWEL : "A";
}
- private static String unambiguousPostAVowelWylie(String wylie1,
- String wylie2) {
+ private static String unambiguousPostAVowelTranslit(boolean EWTSNotACIP,
+ String wylie1,
+ String wylie2,
+ String acip1,
+ String acip2) {
String disambiguator = "";
// type "lard" vs. "lar.d", and you'll see the need for this
// disambiguation of suffix and postsuffix. sa doesn't take
// any head letters, so only da needs to be considered.
if (TibetanMachineWeb.isWylieTop(wylie1)
&& wylie2.equals(/* FIXME: hard-coded */ "d"))
- disambiguator = WYLIE_DISAMBIGUATING_KEY_STRING;
- return wylie1 + disambiguator + wylie2;
+ disambiguator = (EWTSNotACIP) ? WYLIE_DISAMBIGUATING_KEY_STRING : "-";
+ if (EWTSNotACIP)
+ return wylie1 + disambiguator + wylie2;
+ else
+ return acip1 + disambiguator + acip2;
}
/**
-* Gets the Extended Wylie for a sequence of glyphs.
+* Gets the Extended Wylie for the given sequence of glyphs if
+* EWTSNotACIP is true, or the ACIP otherwise.
+* @param EWTSNotACIP true if you want THDL Extended Wylie, false if
+* you want ACIP
* @param dcs an array of glyphs
-* @param noSuchWylie an array which will not be touched if this is
-* successful; however, if there is no THDL Extended Wylie
-* corresponding to these glyphs, then noSuchWylie[0] will be set to
-* true
-* @return the Extended Wylie corresponding to these glyphs, or null */
- public static String getWylie(DuffCode[] dcs, boolean noSuchWylie[]) {
+* @param noSuch an array which will not be touched if this is
+* successful; however, if there is no THDL Extended Wylie/ACIP
+* corresponding to these glyphs, then noSuch[0] will be set to true
+* @return the Extended Wylie/ACIP corresponding to these glyphs, or
+* null */
+ public static String getTranslit(boolean EWTSNotACIP,
+ DuffCode[] dcs,
+ boolean noSuch[]) {
StringBuffer warnings = (debug ? new StringBuffer() : null);
- String ans = getWylieImplementation(dcs, noSuchWylie, warnings);
+ String ans
+ = getTranslitImplementation(EWTSNotACIP, dcs, noSuch, warnings);
if (debug && warnings.length() > 0)
System.out.println("DEBUG: warnings in TMW->Wylie: " + warnings);
return ans;
@@ -1172,13 +1185,13 @@ public class TibTextUtils implements THDLWylieConstants {
return candidateType;
}
- /** Appends to wylieBuffer the wylie for the glyph list glyphList
- (which should be an ArrayList for speed). This will be very
- user-friendly for "legal tsheg bars" and will be valid, but
- possibly ugly (interspersed with disambiguators or extra
- vowels, etc.) Wylie for other things, such as Sanskrit
- transliteration. Updates warnings and noSuchWylie like the
- caller does.
+ /** Appends to translitBuffer the EWTS/ACIP for the glyph list
+ glyphList (which should be an ArrayList for speed). This will
+ be very user-friendly for "legal tsheg bars" and will be
+ valid, but possibly ugly (interspersed with disambiguators or
+ extra vowels, etc.) Wylie/ACIP for other things, such as
+ Sanskrit transliteration. Updates warnings and noSuch like
+ the caller does.
What constitutes a legal, non-punctuation, non-whitespace
tsheg bar? The following are the only such:
@@ -1219,22 +1232,23 @@ public class TibTextUtils implements THDLWylieConstants {
When there are three unadorned consonant stacks in a
tyllable, a hard-coded list of valid Tibetan tsheg bars is
- relied upon to determine if the 'a' vowel comes after the
- first or the second consonant.
*/
- private static void getTshegBarWylie(java.util.List glyphList,
- boolean noSuchWylie[],
- StringBuffer warnings,
- StringBuffer wylieBuffer) {
+ relied upon to determine if the 'a'/'A' vowel comes after
+ the first or the second consonant.
*/
+ private static void getTshegBarTranslit(boolean EWTSNotACIP,
+ java.util.List glyphList,
+ boolean noSuch[],
+ StringBuffer warnings,
+ StringBuffer translitBuffer) {
TGCList gcs
- = breakTshegBarIntoGraphemeClusters(glyphList, noSuchWylie);
+ = breakTshegBarIntoGraphemeClusters(glyphList, noSuch);
String candidateType = getClassificationOfTshegBar(gcs, warnings, false);
int sz = gcs.size();
if (candidateType == "invalid"
|| candidateType == "single-sanskrit-gc") {
// Forget beauty and succintness -- just be sure to
- // generate Wylie that can be converted unambiguously into
- // Tibetan. Use a disambiguator or vowel after each
- // grapheme cluster.
+ // generate transliteration that can be converted
+ // unambiguously into Tibetan. Use a disambiguator or
+ // vowel after each grapheme cluster.
//
// If we truly didn't care about beauty, we'd just lump
// SANSKRIT_WITHOUT_VOWEL and SANSKRIT_WITH_VOWEL into
@@ -1244,19 +1258,20 @@ public class TibTextUtils implements THDLWylieConstants {
TGCPair tp = (TGCPair)gcs.get(i);
int cls = tp.classification;
String wylie = tp.getWylie();
- wylieBuffer.append(wylie);
+ String translit = (EWTSNotACIP) ? wylie : tp.getACIP();
+ translitBuffer.append(translit);
if (TibetanMachineWeb.isWylieTibetanConsonantOrConsonantStack(wylie)
|| TibetanMachineWeb.isWylieSanskritConsonantStack(wylie)) {
- wylieBuffer.append(aVowelToUseAfter(wylie));
+ translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie));
} else {
if (TGCPair.CONSONANTAL_WITH_VOWEL != cls
&& TGCPair.SANSKRIT_WITH_VOWEL != cls)
- wylieBuffer.append(WYLIE_DISAMBIGUATING_KEY);
+ translitBuffer.append(EWTSNotACIP ? WYLIE_DISAMBIGUATING_KEY : '-');
}
}
} else {
- // Generate perfect, beautiful, Wylie, using the minimum
- // number of vowels and disambiguators.
+ // Generate perfect, beautiful transliteration, using the
+ // minimum number of vowels and disambiguators.
int leftover = sz + 1;
@@ -1299,23 +1314,44 @@ public class TibTextUtils implements THDLWylieConstants {
String wylie1 = ((TGCPair)gcs.get(0)).getWylie();
String wylie2 = ((TGCPair)gcs.get(1)).getWylie();
String wylie3 = ((TGCPair)gcs.get(2)).getWylie();
+ String acip1 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(0)).getACIP();
+ String acip2 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(1)).getACIP();
+ String acip3 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(2)).getACIP();
if ((wylie1.equals("g") && (wylie2.equals("d") || wylie2.equals("n") || wylie2.equals("s")))
|| (wylie1.equals("d") && (wylie2.equals("g") || wylie2.equals("m")))
|| (wylie1.equals("b") && wylie2.equals("d"))
|| (wylie1.equals("m") && wylie2.equals("d"))
|| (wylie1.equals("'") && (wylie2.equals("g") || wylie2.equals("d") || wylie2.equals("b")))) {
if (TibetanMachineWeb.isAmbiguousWylie(wylie1, wylie2))
- wylieBuffer.append(wylie1 + WYLIE_DISAMBIGUATING_KEY + wylie2);
+ if (EWTSNotACIP)
+ translitBuffer.append(wylie1 + WYLIE_DISAMBIGUATING_KEY + wylie2);
+ else
+ translitBuffer.append(acip1 + '-' + acip2);
else
- wylieBuffer.append(wylie1 + wylie2);
+ if (EWTSNotACIP)
+ translitBuffer.append(wylie1 + wylie2);
+ else
+ translitBuffer.append(acip1 + acip2);
- wylieBuffer.append(aVowelToUseAfter(wylie2)
- + wylie3);
+ translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie2)
+ + (EWTSNotACIP ? wylie3 : acip3));
} else {
- wylieBuffer.append(wylie1
- + aVowelToUseAfter(wylie1)
- + unambiguousPostAVowelWylie(wylie2,
- wylie3));
+ if (EWTSNotACIP)
+ translitBuffer.append(wylie1
+ + aVowelToUseAfter(EWTSNotACIP, wylie1)
+ + unambiguousPostAVowelTranslit(EWTSNotACIP,
+ wylie2,
+ wylie3,
+ acip2,
+ acip3));
+ else
+ translitBuffer.append(acip1
+ + aVowelToUseAfter(EWTSNotACIP, wylie1)
+ + unambiguousPostAVowelTranslit(EWTSNotACIP,
+ wylie2,
+ wylie3,
+ acip2,
+ acip3));
}
} else if ("root" == candidateType
|| "prefix/root-root/suffix" == candidateType
@@ -1323,13 +1359,14 @@ public class TibTextUtils implements THDLWylieConstants {
|| "root-suffix-postsuffix" == candidateType
|| "root-suffix" == candidateType) {
String wylie1 = ((TGCPair)gcs.get(0)).getWylie();
+ String acip1 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(0)).getACIP();
leftover = 1;
- wylieBuffer.append(wylie1);
+ translitBuffer.append((EWTSNotACIP) ? wylie1 : acip1);
if (((TGCPair)gcs.get(0)).classification
!= TGCPair.CONSONANTAL_WITH_VOWEL) {
ThdlDebug.verify(TGCPair.CONSONANTAL_WITHOUT_VOWEL
== ((TGCPair)gcs.get(0)).classification);
- wylieBuffer.append(aVowelToUseAfter(wylie1));
+ translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie1));
if (debug) System.out.println("DEBUG: appending vowel");
} else {
if (debug) System.out.println("DEBUG: already has vowel 2");
@@ -1338,26 +1375,39 @@ public class TibTextUtils implements THDLWylieConstants {
leftover = 3;
String wylie2 = ((TGCPair)gcs.get(1)).getWylie();
String wylie3 = ((TGCPair)gcs.get(2)).getWylie();
- wylieBuffer.append(unambiguousPostAVowelWylie(wylie2,
- wylie3));
+ String acip2 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(1)).getACIP();
+ String acip3 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(2)).getACIP();
+ translitBuffer.append(unambiguousPostAVowelTranslit(EWTSNotACIP,
+ wylie2,
+ wylie3,
+ acip2,
+ acip3));
}
} else if ("prefix-root-suffix" == candidateType
|| "prefix-root" == candidateType
|| "prefix-root-suffix-postsuffix" == candidateType) {
String wylie1 = ((TGCPair)gcs.get(0)).getWylie();
String wylie2 = ((TGCPair)gcs.get(1)).getWylie();
+ String acip1 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(0)).getACIP();
+ String acip2 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(1)).getACIP();
leftover = 2;
if (TibetanMachineWeb.isAmbiguousWylie(wylie1, wylie2))
- wylieBuffer.append(wylie1 + WYLIE_DISAMBIGUATING_KEY + wylie2);
+ if (EWTSNotACIP)
+ translitBuffer.append(wylie1 + WYLIE_DISAMBIGUATING_KEY + wylie2);
+ else
+ translitBuffer.append(acip1 + '-' + acip2);
else
- wylieBuffer.append(wylie1 + wylie2);
+ if (EWTSNotACIP)
+ translitBuffer.append(wylie1 + wylie2);
+ else
+ translitBuffer.append(acip1 + acip2);
if (((TGCPair)gcs.get(1)).classification
!= TGCPair.CONSONANTAL_WITH_VOWEL) {
ThdlDebug.verify(TGCPair.CONSONANTAL_WITHOUT_VOWEL
== ((TGCPair)gcs.get(1)).classification);
if (debug) System.out.println("DEBUG: appending vowel");
- wylieBuffer.append(aVowelToUseAfter(wylie2));
+ translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie2));
} else {
if (debug) System.out.println("DEBUG: already has vowel 1");
}
@@ -1365,8 +1415,13 @@ public class TibTextUtils implements THDLWylieConstants {
leftover = 4;
String wylie3 = ((TGCPair)gcs.get(2)).getWylie();
String wylie4 = ((TGCPair)gcs.get(3)).getWylie();
- wylieBuffer.append(unambiguousPostAVowelWylie(wylie3,
- wylie4));
+ String acip3 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(2)).getACIP();
+ String acip4 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(3)).getACIP();
+ translitBuffer.append(unambiguousPostAVowelTranslit(EWTSNotACIP,
+ wylie3,
+ wylie4,
+ acip3,
+ acip4));
}
} else if ("number" == candidateType) {
leftover = 0;
@@ -1374,18 +1429,17 @@ public class TibTextUtils implements THDLWylieConstants {
throw new Error("missed a case down here");
}
- // append the wylie left over:
+ // append the wylie/ACIP left over:
for (int i = leftover; i < sz; i++) {
TGCPair tp = (TGCPair)gcs.get(i);
- String wylie = tp.getWylie();
- wylieBuffer.append(wylie);
+ translitBuffer.append(EWTSNotACIP ? tp.getWylie() : tp.getACIP());
}
}
}
/**
-* Gets the Extended Wylie for a sequence of glyphs. This works as
-* follows:
+* Gets the Extended Wylie/ACIP for a sequence of glyphs. This works
+* as follows:
*
* We run along until we hit whitespace or punctuation. We take
* everything before that and we see if it's a legal Tibetan tsheg bar,
@@ -1393,22 +1447,25 @@ public class TibTextUtils implements THDLWylieConstants {
* vowel in the correct place. If not, then we throw a disambiguating
* key or a vowel after each stack.
*
+* @param EWTSNotACIP true if you want THDL Extended Wylie, false if
+* you want ACIP
* @param dcs an array of glyphs
-* @param noSuchWylie an array which will not be touched if this is
-* successful; however, if there is no THDL Extended Wylie
-* corresponding to these glyphs, then noSuchWylie[0] will be set to
-* true
+* @param noSuch an array which will not be touched if this is
+* successful; however, if there is no THDL Extended Wylie/ACIP
+* corresponding to these glyphs, then noSuch[0] will be set to true
* @param warnings either null or a buffer to which will be appended
* warnings about illegal tsheg bars
-* @return the Extended Wylie corresponding to these glyphs, or null */
- public static String getWylieImplementation(DuffCode[] dcs,
- boolean noSuchWylie[],
- StringBuffer warnings) {
+* @return the Extended Wylie/ACIP corresponding to these glyphs, or
+* null */
+ private static String getTranslitImplementation(boolean EWTSNotACIP,
+ DuffCode[] dcs,
+ boolean noSuch[],
+ StringBuffer warnings) {
if (dcs.length == 0)
return null;
ArrayList glyphList = new ArrayList();
- StringBuffer wylieBuffer = new StringBuffer();
+ StringBuffer translitBuffer = new StringBuffer();
for (int i=0; i 0)
- return wylieBuffer.toString();
+ if (translitBuffer.length() > 0)
+ return translitBuffer.toString();
else
return null;
}
diff --git a/source/org/thdl/tib/text/TibetanDocument.java b/source/org/thdl/tib/text/TibetanDocument.java
index 48bca39..cbd7198 100644
--- a/source/org/thdl/tib/text/TibetanDocument.java
+++ b/source/org/thdl/tib/text/TibetanDocument.java
@@ -294,6 +294,18 @@ public class TibetanDocument extends DefaultStyledDocument {
return getWylie(0, getLength(), noSuchWylie);
}
+/**
+* Converts the entire document into ACIP. If the document consists of
+* both Tibetan and non-Tibetan fonts, however, the conversion stops at
+* the first non-Tibetan font.
+* @param noSuchACIP an array which will not be touched if this is
+* successful; however, if there is no ACIP corresponding to one of
+* these glyphs, then noSuchACIP[0] will be set to true
+* @return the string of ACIP corresponding to this document */
+ public String getACIP(boolean noSuchACIP[]) {
+ return getACIP(0, getLength(), noSuchACIP);
+ }
+
/**
* Converts a portion of the document into Extended Wylie.
* If the document consists of both Tibetan and
@@ -306,7 +318,25 @@ public class TibetanDocument extends DefaultStyledDocument {
* corresponding to one of these glyphs, then noSuchWylie[0] will be
* set to true
* @return the string of Wylie corresponding to this document */
- public String getWylie(int begin, int end, boolean noSuchWylie[]) {
+ public String getWylie(int begin, int end, boolean noSuchWylie[]) {
+ return getTranslit(true, begin, end, noSuchWylie);
+ }
+
+/**
+* Converts a portion of the document into ACIP. If the document
+* consists of both Tibetan and non-Tibetan fonts, however, the
+* conversion stops at the first non-Tibetan font.
+* @param begin the beginning of the region to convert
+* @param end the end of the region to convert
+* @param noSuchWylie an array which will not be touched if this is
+* successful; however, if there is no ACIP corresponding to one of
+* these glyphs, then noSuchACIP[0] will be set to true
+* @return the string of ACIP corresponding to this document */
+ public String getACIP(int begin, int end, boolean noSuchACIP[]) {
+ return getTranslit(true, begin, end, noSuchACIP);
+ }
+
+ private String getTranslit(boolean EWTSNotACIP, int begin, int end, boolean noSuch[]) {
AttributeSet attr;
String fontName;
int fontNum;
@@ -318,7 +348,7 @@ public class TibetanDocument extends DefaultStyledDocument {
java.util.List dcs = new ArrayList();
int i = begin;
- StringBuffer wylieBuffer = new StringBuffer();
+ StringBuffer translitBuffer = new StringBuffer();
try {
while (i < end) {
@@ -332,10 +362,10 @@ public class TibetanDocument extends DefaultStyledDocument {
if (dcs.size() > 0) {
DuffCode[] dc_array = new DuffCode[0];
dc_array = (DuffCode[])dcs.toArray(dc_array);
- wylieBuffer.append(TibTextUtils.getWylie(dc_array, noSuchWylie));
+ translitBuffer.append(TibTextUtils.getTranslit(EWTSNotACIP, dc_array, noSuch));
dcs.clear();
}
- wylieBuffer.append(ch);
+ translitBuffer.append(ch);
}
//current character isn't TMW
@@ -343,7 +373,7 @@ public class TibetanDocument extends DefaultStyledDocument {
if (dcs.size() > 0) {
DuffCode[] dc_array = new DuffCode[0];
dc_array = (DuffCode[])dcs.toArray(dc_array);
- wylieBuffer.append(TibTextUtils.getWylie(dc_array, noSuchWylie));
+ translitBuffer.append(TibTextUtils.getTranslit(EWTSNotACIP, dc_array, noSuch));
dcs.clear();
}
}
@@ -358,9 +388,9 @@ public class TibetanDocument extends DefaultStyledDocument {
if (dcs.size() > 0) {
DuffCode[] dc_array = new DuffCode[0];
dc_array = (DuffCode[])dcs.toArray(dc_array);
- wylieBuffer.append(TibTextUtils.getWylie(dc_array, noSuchWylie));
+ translitBuffer.append(TibTextUtils.getTranslit(EWTSNotACIP, dc_array, noSuch));
}
- return wylieBuffer.toString();
+ return translitBuffer.toString();
}
catch (BadLocationException ble) {
ble.printStackTrace();
@@ -1101,6 +1131,17 @@ public class TibetanDocument extends DefaultStyledDocument {
* DuffCode..." text into the document */
public boolean toWylie(int start, int end,
long numAttemptedReplacements[]) {
+ return toTranslit(true, start, end, numAttemptedReplacements);
+ }
+
+ // DLC DOC just like {@link #toWylie(int,int,long[])}
+ public boolean toACIP(int start, int end,
+ long numAttemptedReplacements[]) {
+ return toTranslit(false, start, end, numAttemptedReplacements);
+ }
+
+ private boolean toTranslit(boolean EWTSNotACIP, int start, int end,
+ long numAttemptedReplacements[]) {
if (start >= end)
return true;
@@ -1124,7 +1165,9 @@ public class TibetanDocument extends DefaultStyledDocument {
remove(start, i-start);
ThdlDebug.verify(getRomanAttributeSet() != null);
insertString(start,
- TibTextUtils.getWylie(dc_array, noSuchWylie),
+ TibTextUtils.getTranslit(EWTSNotACIP,
+ dc_array,
+ noSuchWylie),
getRomanAttributeSet());
dcs.clear();
}
diff --git a/source/org/thdl/tib/text/TibetanMachineWeb.java b/source/org/thdl/tib/text/TibetanMachineWeb.java
index f0d3807..52304dc 100644
--- a/source/org/thdl/tib/text/TibetanMachineWeb.java
+++ b/source/org/thdl/tib/text/TibetanMachineWeb.java
@@ -1706,6 +1706,13 @@ public static String wylieForGlyph(String hashKey) {
return sb.toString();
}
+ // DLC DOC
+private static String acipForGlyph(String hashKey) {
+ String ACIP // DLC FIXME: test this.
+ = org.thdl.tib.scanner.Manipulate.wylieToAcip(hashKey);
+ return ACIP;
+}
+
/** Error that appears in a document when some TMW cannot be
* transcribed in THDL Extended Wylie. This error message is
* documented in www/htdocs/TMW_RTF_TO_THDL_WYLIE.html, so change
@@ -1716,6 +1723,16 @@ private static String getTMWToWylieErrorString(DuffCode dc) {
+ " to THDL Extended Wylie. Please see the documentation for the TMW font and transcribe this yourself.]]>>";
}
+/** Error that appears in a document when some TMW cannot be
+ * transcribed in ACIP. This error message is
+ * documented in www/htdocs/TMW_RTF_TO_THDL_WYLIE.html (DLC NOT YET), so change
+ * them both when you change this. */
+private static String getTMWToACIPErrorString(DuffCode dc) {
+ return "<<[[JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert DuffCode "
+ + dc.toString(true)
+ + " to ACIP. Please see the documentation for the TMW font and transcribe this yourself.]]>>";
+}
+
/**
* Gets the Extended Wylie value for this glyph.
* @param font the font of the TibetanMachineWeb
@@ -1756,6 +1773,17 @@ public static String getWylieForGlyph(DuffCode dc, boolean noSuchWylie[]) {
return wylieForGlyph(hashKey);
}
+// DLC DOC
+public static String getACIPForGlyph(DuffCode dc, boolean noSuchACIP[]) {
+ String hashKey = getHashKeyForGlyph(dc);
+ String ans = (hashKey == null) ? null : acipForGlyph(hashKey);
+ if (hashKey == null || ans == null) {
+ noSuchACIP[0] = true;
+ return getTMWToACIPErrorString(dc);
+ }
+ return ans;
+}
+
/** This addresses bug 624133, "Input freezes after impossible
* character". Returns true iff s is a proper prefix of some
* legal input for this keyboard. In the extended Wylie