A preliminary TMW->ACIP converter is here. There are known bugs, mostly with rare punctuation.
This commit is contained in:
parent
cc9ab06864
commit
316f59107b
9 changed files with 278 additions and 88 deletions
|
@ -417,6 +417,8 @@ class ConvertDialog extends JDialog
|
|||
} else { // conversion {to Wylie or TM} mode
|
||||
if (TMW_TO_WYLIE == ct) {
|
||||
newFileNamePrefix = suggested_WYLIE_prefix;
|
||||
} else if (TMW_TO_ACIP == ct) {
|
||||
newFileNamePrefix = suggested_ACIP_prefix;
|
||||
} else if (TMW_TO_UNI == ct || ACIP_TO_UNI == ct) {
|
||||
newFileNamePrefix = suggested_TO_UNI_prefix;
|
||||
} else if (TM_TO_TMW == ct || ACIP_TO_TMW == ct) {
|
||||
|
|
|
@ -78,6 +78,12 @@ public class ConverterGUI implements FontConversion, FontConverterConstants {
|
|||
"Attention required",
|
||||
JOptionPane.ERROR_MESSAGE);
|
||||
return false;
|
||||
} else if (49 == returnCode) {
|
||||
JOptionPane.showMessageDialog(cd,
|
||||
"Though an output file has been created, it contains ugly\nerror messages like\n\"<<[[JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP:\n Cannot convert DuffCode...\".\nPlease edit the output by hand to replace all such\ncreatures with the correct ACIP transliteration.",
|
||||
"Attention required",
|
||||
JOptionPane.ERROR_MESSAGE);
|
||||
return false;
|
||||
} else if (43 == returnCode) {
|
||||
JOptionPane.showMessageDialog(cd,
|
||||
"Though an output file has been created, this conversion did nothing.\nDid you choose the correct original file?\nDid you choose the correct type of conversion?",
|
||||
|
|
|
@ -31,6 +31,7 @@ interface FontConverterConstants
|
|||
final String TM_TO_TMW = "TM to TMW";
|
||||
final String TMW_TO_UNI = "TMW to Unicode";
|
||||
final String TMW_TO_WYLIE = "TMW to Wylie";
|
||||
final String TMW_TO_ACIP = "TMW to ACIP";
|
||||
final String TMW_TO_TM = "TMW to TM";
|
||||
final String FIND_SOME_NON_TMW = "Find some non-TMW";
|
||||
final String FIND_SOME_NON_TM = "Find some non-TM";
|
||||
|
@ -43,6 +44,7 @@ interface FontConverterConstants
|
|||
TM_TO_TMW,
|
||||
TMW_TO_UNI,
|
||||
TMW_TO_WYLIE,
|
||||
TMW_TO_ACIP,
|
||||
TMW_TO_TM,
|
||||
FIND_SOME_NON_TMW,
|
||||
FIND_SOME_NON_TM,
|
||||
|
@ -51,6 +53,7 @@ interface FontConverterConstants
|
|||
};
|
||||
|
||||
final String suggested_WYLIE_prefix = "THDL_Wylie_";
|
||||
final String suggested_ACIP_prefix = "ACIP_";
|
||||
final String suggested_TO_TMW_prefix = "TMW_";
|
||||
final String suggested_TO_UNI_prefix = "Uni_";
|
||||
final String suggested_TO_TM_prefix = "TM_";
|
||||
|
|
|
@ -74,6 +74,7 @@ public class TibetanConverter implements FontConverterConstants {
|
|||
boolean convertACIPToTMWMode = false;
|
||||
boolean convertToTMWMode = false;
|
||||
boolean convertToWylieMode = false;
|
||||
boolean convertToACIPMode = false;
|
||||
boolean findSomeNonTMWMode = false;
|
||||
boolean findAllNonTMWMode = false;
|
||||
boolean findSomeNonTMMode = false;
|
||||
|
@ -98,6 +99,8 @@ public class TibetanConverter implements FontConverterConstants {
|
|||
= args[0].equals("--to-unicode"))
|
||||
|| (convertToWylieMode
|
||||
= args[0].equals("--to-wylie"))
|
||||
|| (convertToACIPMode
|
||||
= args[0].equals("--to-acip"))
|
||||
|| (findSomeNonTMWMode
|
||||
= args[0].equals("--find-some-non-tmw"))
|
||||
|| (findSomeNonTMMode
|
||||
|
@ -107,7 +110,7 @@ public class TibetanConverter implements FontConverterConstants {
|
|||
))) {
|
||||
out.println("TibetanConverter [--find-all-non-tmw | --find-some-non-tmw");
|
||||
out.println(" | --to-tibetan-machine | --to-tibetan-machine-web");
|
||||
out.println(" | --to-unicode | --to-wylie] RTF_file");
|
||||
out.println(" | --to-unicode | --to-wylie | --to-acip] RTF_file");
|
||||
out.println(" | TibetanConverter --acip-to-unicode TXT_file");
|
||||
out.println(" | TibetanConverter [--version | -v | --help | -h]");
|
||||
out.println("");
|
||||
|
@ -120,6 +123,7 @@ public class TibetanConverter implements FontConverterConstants {
|
|||
out.println(" --to-unicode to convert TibetanMachineWeb to Unicode");
|
||||
out.println(" --to-tibetan-machine-web to convert TibetanMachine to TibetanMachineWeb");
|
||||
out.println(" --to-wylie to convert TibetanMachineWeb to THDL Extended Wylie");
|
||||
out.println(" --to-acip to convert TibetanMachineWeb to ACIP");
|
||||
out.println(" --acip-to-unicode to convert ACIP text file to Unicode text file");
|
||||
out.println(" --find-all-non-tmw to locate all characters in the input document that are");
|
||||
out.println(" not in Tibetan Machine Web fonts, exit zero if and only if none found");
|
||||
|
@ -177,6 +181,8 @@ public class TibetanConverter implements FontConverterConstants {
|
|||
} else { // conversion {to Wylie or TM} mode
|
||||
if (convertToWylieMode) {
|
||||
conversionTag = TMW_TO_WYLIE;
|
||||
} else if (convertToACIPMode) {
|
||||
conversionTag = TMW_TO_ACIP;
|
||||
} else if (convertToUnicodeMode) {
|
||||
conversionTag = TMW_TO_UNI;
|
||||
} else if (convertToTMWMode) {
|
||||
|
@ -311,6 +317,7 @@ public class TibetanConverter implements FontConverterConstants {
|
|||
ThdlDebug.verify(((TMW_TO_TM == ct) ? 1 : 0)
|
||||
+ ((TMW_TO_UNI == ct) ? 1 : 0)
|
||||
+ ((TM_TO_TMW == ct) ? 1 : 0)
|
||||
+ ((TMW_TO_ACIP == ct) ? 1 : 0)
|
||||
+ ((TMW_TO_WYLIE == ct) ? 1 : 0)
|
||||
== 1);
|
||||
long numAttemptedReplacements[] = new long[] { 0 };
|
||||
|
@ -321,6 +328,13 @@ public class TibetanConverter implements FontConverterConstants {
|
|||
numAttemptedReplacements)) {
|
||||
exitCode = 44;
|
||||
}
|
||||
} else if (TMW_TO_ACIP == ct) {
|
||||
// Convert to ACIP:
|
||||
if (!tdoc.toACIP(0,
|
||||
tdoc.getLength(),
|
||||
numAttemptedReplacements)) {
|
||||
exitCode = 49;
|
||||
}
|
||||
} else if (TMW_TO_UNI == ct) {
|
||||
StringBuffer errors = new StringBuffer();
|
||||
// Convert to Unicode:
|
||||
|
|
|
@ -44,8 +44,13 @@ public class Manipulate
|
|||
return ch=='a' || ch=='e' || ch=='i' || ch=='o' || ch=='u';
|
||||
}
|
||||
|
||||
/** Returns null on error. */
|
||||
public static String wylieToAcip(String palabra)
|
||||
{
|
||||
// DLC FIXME: for unknown things, return null.
|
||||
if (palabra.equals("@#")) return "*";
|
||||
if (palabra.startsWith("@") || palabra.startsWith("#"))
|
||||
return null; // we can't convert this in isolation! We need context.
|
||||
char []caract;
|
||||
int i, j, len;
|
||||
String nuevaPalabra;
|
||||
|
@ -83,6 +88,12 @@ public class Manipulate
|
|||
nuevaPalabra = replace(nuevaPalabra, "TSH", "TQQ");
|
||||
nuevaPalabra = replace(nuevaPalabra, "TS", "TZ");
|
||||
nuevaPalabra = replace(nuevaPalabra, "TQQ", "TS");
|
||||
nuevaPalabra = replace(nuevaPalabra, "a", "'A");
|
||||
nuevaPalabra = replace(nuevaPalabra, "i", "'I");
|
||||
nuevaPalabra = replace(nuevaPalabra, "u", "'U");
|
||||
nuevaPalabra = replace(nuevaPalabra, "-I", "i");
|
||||
nuevaPalabra = replace(nuevaPalabra, "/", ",");
|
||||
nuevaPalabra = replace(nuevaPalabra, "_", " ");
|
||||
nuevaPalabra = fixWazur(nuevaPalabra);
|
||||
return nuevaPalabra;
|
||||
}
|
||||
|
|
|
@ -86,6 +86,30 @@ public class TGCPair {
|
|||
b.append(vowelWylie);
|
||||
return b.toString();
|
||||
}
|
||||
public String getACIP() {
|
||||
// DLC FIXME: has the EWTS change affected Manipulate.acipToWylie?
|
||||
StringBuffer b = new StringBuffer();
|
||||
if (consonantWylie != null) {
|
||||
String consonantACIP // DLC FIXME can KAsh occur?
|
||||
= org.thdl.tib.scanner.Manipulate.wylieToAcip(consonantWylie);
|
||||
if (null == consonantACIP) throw new Error("how?");
|
||||
// System.out.println("DLC: Wylie=" + consonantWylie + ", ACIP=" + consonantACIP);
|
||||
// we may have {P-Y}, but the user wants to see {PY}.
|
||||
for (int i = 0; i < consonantACIP.length(); i++) {
|
||||
char ch = consonantACIP.charAt(i);
|
||||
if ('-' != ch)
|
||||
b.append(ch);
|
||||
}
|
||||
}
|
||||
if (vowelWylie != null) {
|
||||
String vowelACIP // DLC FIXME look for exceptions
|
||||
= org.thdl.tib.scanner.Manipulate.wylieToAcip(vowelWylie);
|
||||
// System.out.println("DLC: Wylie=" + vowelWylie + ", ACIP=" + vowelACIP);
|
||||
if (null == vowelACIP) throw new Error("how?");
|
||||
b.append(vowelACIP);
|
||||
}
|
||||
return b.toString();
|
||||
}
|
||||
public int classification;
|
||||
/** Constructs a new TGCPair with (Tibetan or Sanskrit) consonant
|
||||
* consonantWylie and vowel vowelWylie. Use
|
||||
|
|
|
@ -786,37 +786,50 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
private static final boolean makeIllegalTibetanGoEndToEnd = true;
|
||||
|
||||
|
||||
/** Returns "a", unless wylie is already "a". */
|
||||
private static String aVowelToUseAfter(String wylie) {
|
||||
/** Returns "a"/"A", unless wylie (which really is EWTS, not ACIP)
|
||||
is already "a". */
|
||||
private static String aVowelToUseAfter(boolean EWTSNotACIP, String wylie) {
|
||||
if (wylie.equals(ACHEN))
|
||||
return "";
|
||||
else
|
||||
return WYLIE_aVOWEL;
|
||||
return (EWTSNotACIP) ? WYLIE_aVOWEL : "A";
|
||||
}
|
||||
|
||||
private static String unambiguousPostAVowelWylie(String wylie1,
|
||||
String wylie2) {
|
||||
private static String unambiguousPostAVowelTranslit(boolean EWTSNotACIP,
|
||||
String wylie1,
|
||||
String wylie2,
|
||||
String acip1,
|
||||
String acip2) {
|
||||
String disambiguator = "";
|
||||
// type "lard" vs. "lar.d", and you'll see the need for this
|
||||
// disambiguation of suffix and postsuffix. sa doesn't take
|
||||
// any head letters, so only da needs to be considered.
|
||||
if (TibetanMachineWeb.isWylieTop(wylie1)
|
||||
&& wylie2.equals(/* FIXME: hard-coded */ "d"))
|
||||
disambiguator = WYLIE_DISAMBIGUATING_KEY_STRING;
|
||||
return wylie1 + disambiguator + wylie2;
|
||||
disambiguator = (EWTSNotACIP) ? WYLIE_DISAMBIGUATING_KEY_STRING : "-";
|
||||
if (EWTSNotACIP)
|
||||
return wylie1 + disambiguator + wylie2;
|
||||
else
|
||||
return acip1 + disambiguator + acip2;
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the Extended Wylie for a sequence of glyphs.
|
||||
* Gets the Extended Wylie for the given sequence of glyphs if
|
||||
* EWTSNotACIP is true, or the ACIP otherwise.
|
||||
* @param EWTSNotACIP true if you want THDL Extended Wylie, false if
|
||||
* you want ACIP
|
||||
* @param dcs an array of glyphs
|
||||
* @param noSuchWylie an array which will not be touched if this is
|
||||
* successful; however, if there is no THDL Extended Wylie
|
||||
* corresponding to these glyphs, then noSuchWylie[0] will be set to
|
||||
* true
|
||||
* @return the Extended Wylie corresponding to these glyphs, or null */
|
||||
public static String getWylie(DuffCode[] dcs, boolean noSuchWylie[]) {
|
||||
* @param noSuch an array which will not be touched if this is
|
||||
* successful; however, if there is no THDL Extended Wylie/ACIP
|
||||
* corresponding to these glyphs, then noSuch[0] will be set to true
|
||||
* @return the Extended Wylie/ACIP corresponding to these glyphs, or
|
||||
* null */
|
||||
public static String getTranslit(boolean EWTSNotACIP,
|
||||
DuffCode[] dcs,
|
||||
boolean noSuch[]) {
|
||||
StringBuffer warnings = (debug ? new StringBuffer() : null);
|
||||
String ans = getWylieImplementation(dcs, noSuchWylie, warnings);
|
||||
String ans
|
||||
= getTranslitImplementation(EWTSNotACIP, dcs, noSuch, warnings);
|
||||
if (debug && warnings.length() > 0)
|
||||
System.out.println("DEBUG: warnings in TMW->Wylie: " + warnings);
|
||||
return ans;
|
||||
|
@ -1172,13 +1185,13 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
return candidateType;
|
||||
}
|
||||
|
||||
/** Appends to wylieBuffer the wylie for the glyph list glyphList
|
||||
(which should be an ArrayList for speed). This will be very
|
||||
user-friendly for "legal tsheg bars" and will be valid, but
|
||||
possibly ugly (interspersed with disambiguators or extra
|
||||
vowels, etc.) Wylie for other things, such as Sanskrit
|
||||
transliteration. Updates warnings and noSuchWylie like the
|
||||
caller does.
|
||||
/** Appends to translitBuffer the EWTS/ACIP for the glyph list
|
||||
glyphList (which should be an ArrayList for speed). This will
|
||||
be very user-friendly for "legal tsheg bars" and will be
|
||||
valid, but possibly ugly (interspersed with disambiguators or
|
||||
extra vowels, etc.) Wylie/ACIP for other things, such as
|
||||
Sanskrit transliteration. Updates warnings and noSuch like
|
||||
the caller does.
|
||||
|
||||
<p>What constitutes a legal, non-punctuation, non-whitespace
|
||||
tsheg bar? The following are the only such:</p>
|
||||
|
@ -1219,22 +1232,23 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
|
||||
<p>When there are three unadorned consonant stacks in a
|
||||
tyllable, a hard-coded list of valid Tibetan tsheg bars is
|
||||
relied upon to determine if the 'a' vowel comes after the
|
||||
first or the second consonant.</p> */
|
||||
private static void getTshegBarWylie(java.util.List glyphList,
|
||||
boolean noSuchWylie[],
|
||||
StringBuffer warnings,
|
||||
StringBuffer wylieBuffer) {
|
||||
relied upon to determine if the 'a'/'A' vowel comes after
|
||||
the first or the second consonant.</p> */
|
||||
private static void getTshegBarTranslit(boolean EWTSNotACIP,
|
||||
java.util.List glyphList,
|
||||
boolean noSuch[],
|
||||
StringBuffer warnings,
|
||||
StringBuffer translitBuffer) {
|
||||
TGCList gcs
|
||||
= breakTshegBarIntoGraphemeClusters(glyphList, noSuchWylie);
|
||||
= breakTshegBarIntoGraphemeClusters(glyphList, noSuch);
|
||||
String candidateType = getClassificationOfTshegBar(gcs, warnings, false);
|
||||
int sz = gcs.size();
|
||||
if (candidateType == "invalid"
|
||||
|| candidateType == "single-sanskrit-gc") {
|
||||
// Forget beauty and succintness -- just be sure to
|
||||
// generate Wylie that can be converted unambiguously into
|
||||
// Tibetan. Use a disambiguator or vowel after each
|
||||
// grapheme cluster.
|
||||
// generate transliteration that can be converted
|
||||
// unambiguously into Tibetan. Use a disambiguator or
|
||||
// vowel after each grapheme cluster.
|
||||
//
|
||||
// If we truly didn't care about beauty, we'd just lump
|
||||
// SANSKRIT_WITHOUT_VOWEL and SANSKRIT_WITH_VOWEL into
|
||||
|
@ -1244,19 +1258,20 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
TGCPair tp = (TGCPair)gcs.get(i);
|
||||
int cls = tp.classification;
|
||||
String wylie = tp.getWylie();
|
||||
wylieBuffer.append(wylie);
|
||||
String translit = (EWTSNotACIP) ? wylie : tp.getACIP();
|
||||
translitBuffer.append(translit);
|
||||
if (TibetanMachineWeb.isWylieTibetanConsonantOrConsonantStack(wylie)
|
||||
|| TibetanMachineWeb.isWylieSanskritConsonantStack(wylie)) {
|
||||
wylieBuffer.append(aVowelToUseAfter(wylie));
|
||||
translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie));
|
||||
} else {
|
||||
if (TGCPair.CONSONANTAL_WITH_VOWEL != cls
|
||||
&& TGCPair.SANSKRIT_WITH_VOWEL != cls)
|
||||
wylieBuffer.append(WYLIE_DISAMBIGUATING_KEY);
|
||||
translitBuffer.append(EWTSNotACIP ? WYLIE_DISAMBIGUATING_KEY : '-');
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Generate perfect, beautiful, Wylie, using the minimum
|
||||
// number of vowels and disambiguators.
|
||||
// Generate perfect, beautiful transliteration, using the
|
||||
// minimum number of vowels and disambiguators.
|
||||
|
||||
int leftover = sz + 1;
|
||||
|
||||
|
@ -1299,23 +1314,44 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
String wylie1 = ((TGCPair)gcs.get(0)).getWylie();
|
||||
String wylie2 = ((TGCPair)gcs.get(1)).getWylie();
|
||||
String wylie3 = ((TGCPair)gcs.get(2)).getWylie();
|
||||
String acip1 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(0)).getACIP();
|
||||
String acip2 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(1)).getACIP();
|
||||
String acip3 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(2)).getACIP();
|
||||
if ((wylie1.equals("g") && (wylie2.equals("d") || wylie2.equals("n") || wylie2.equals("s")))
|
||||
|| (wylie1.equals("d") && (wylie2.equals("g") || wylie2.equals("m")))
|
||||
|| (wylie1.equals("b") && wylie2.equals("d"))
|
||||
|| (wylie1.equals("m") && wylie2.equals("d"))
|
||||
|| (wylie1.equals("'") && (wylie2.equals("g") || wylie2.equals("d") || wylie2.equals("b")))) {
|
||||
if (TibetanMachineWeb.isAmbiguousWylie(wylie1, wylie2))
|
||||
wylieBuffer.append(wylie1 + WYLIE_DISAMBIGUATING_KEY + wylie2);
|
||||
if (EWTSNotACIP)
|
||||
translitBuffer.append(wylie1 + WYLIE_DISAMBIGUATING_KEY + wylie2);
|
||||
else
|
||||
translitBuffer.append(acip1 + '-' + acip2);
|
||||
else
|
||||
wylieBuffer.append(wylie1 + wylie2);
|
||||
if (EWTSNotACIP)
|
||||
translitBuffer.append(wylie1 + wylie2);
|
||||
else
|
||||
translitBuffer.append(acip1 + acip2);
|
||||
|
||||
wylieBuffer.append(aVowelToUseAfter(wylie2)
|
||||
+ wylie3);
|
||||
translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie2)
|
||||
+ (EWTSNotACIP ? wylie3 : acip3));
|
||||
} else {
|
||||
wylieBuffer.append(wylie1
|
||||
+ aVowelToUseAfter(wylie1)
|
||||
+ unambiguousPostAVowelWylie(wylie2,
|
||||
wylie3));
|
||||
if (EWTSNotACIP)
|
||||
translitBuffer.append(wylie1
|
||||
+ aVowelToUseAfter(EWTSNotACIP, wylie1)
|
||||
+ unambiguousPostAVowelTranslit(EWTSNotACIP,
|
||||
wylie2,
|
||||
wylie3,
|
||||
acip2,
|
||||
acip3));
|
||||
else
|
||||
translitBuffer.append(acip1
|
||||
+ aVowelToUseAfter(EWTSNotACIP, wylie1)
|
||||
+ unambiguousPostAVowelTranslit(EWTSNotACIP,
|
||||
wylie2,
|
||||
wylie3,
|
||||
acip2,
|
||||
acip3));
|
||||
}
|
||||
} else if ("root" == candidateType
|
||||
|| "prefix/root-root/suffix" == candidateType
|
||||
|
@ -1323,13 +1359,14 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
|| "root-suffix-postsuffix" == candidateType
|
||||
|| "root-suffix" == candidateType) {
|
||||
String wylie1 = ((TGCPair)gcs.get(0)).getWylie();
|
||||
String acip1 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(0)).getACIP();
|
||||
leftover = 1;
|
||||
wylieBuffer.append(wylie1);
|
||||
translitBuffer.append((EWTSNotACIP) ? wylie1 : acip1);
|
||||
if (((TGCPair)gcs.get(0)).classification
|
||||
!= TGCPair.CONSONANTAL_WITH_VOWEL) {
|
||||
ThdlDebug.verify(TGCPair.CONSONANTAL_WITHOUT_VOWEL
|
||||
== ((TGCPair)gcs.get(0)).classification);
|
||||
wylieBuffer.append(aVowelToUseAfter(wylie1));
|
||||
translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie1));
|
||||
if (debug) System.out.println("DEBUG: appending vowel");
|
||||
} else {
|
||||
if (debug) System.out.println("DEBUG: already has vowel 2");
|
||||
|
@ -1338,26 +1375,39 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
leftover = 3;
|
||||
String wylie2 = ((TGCPair)gcs.get(1)).getWylie();
|
||||
String wylie3 = ((TGCPair)gcs.get(2)).getWylie();
|
||||
wylieBuffer.append(unambiguousPostAVowelWylie(wylie2,
|
||||
wylie3));
|
||||
String acip2 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(1)).getACIP();
|
||||
String acip3 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(2)).getACIP();
|
||||
translitBuffer.append(unambiguousPostAVowelTranslit(EWTSNotACIP,
|
||||
wylie2,
|
||||
wylie3,
|
||||
acip2,
|
||||
acip3));
|
||||
}
|
||||
} else if ("prefix-root-suffix" == candidateType
|
||||
|| "prefix-root" == candidateType
|
||||
|| "prefix-root-suffix-postsuffix" == candidateType) {
|
||||
String wylie1 = ((TGCPair)gcs.get(0)).getWylie();
|
||||
String wylie2 = ((TGCPair)gcs.get(1)).getWylie();
|
||||
String acip1 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(0)).getACIP();
|
||||
String acip2 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(1)).getACIP();
|
||||
leftover = 2;
|
||||
if (TibetanMachineWeb.isAmbiguousWylie(wylie1, wylie2))
|
||||
wylieBuffer.append(wylie1 + WYLIE_DISAMBIGUATING_KEY + wylie2);
|
||||
if (EWTSNotACIP)
|
||||
translitBuffer.append(wylie1 + WYLIE_DISAMBIGUATING_KEY + wylie2);
|
||||
else
|
||||
translitBuffer.append(acip1 + '-' + acip2);
|
||||
else
|
||||
wylieBuffer.append(wylie1 + wylie2);
|
||||
if (EWTSNotACIP)
|
||||
translitBuffer.append(wylie1 + wylie2);
|
||||
else
|
||||
translitBuffer.append(acip1 + acip2);
|
||||
|
||||
if (((TGCPair)gcs.get(1)).classification
|
||||
!= TGCPair.CONSONANTAL_WITH_VOWEL) {
|
||||
ThdlDebug.verify(TGCPair.CONSONANTAL_WITHOUT_VOWEL
|
||||
== ((TGCPair)gcs.get(1)).classification);
|
||||
if (debug) System.out.println("DEBUG: appending vowel");
|
||||
wylieBuffer.append(aVowelToUseAfter(wylie2));
|
||||
translitBuffer.append(aVowelToUseAfter(EWTSNotACIP, wylie2));
|
||||
} else {
|
||||
if (debug) System.out.println("DEBUG: already has vowel 1");
|
||||
}
|
||||
|
@ -1365,8 +1415,13 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
leftover = 4;
|
||||
String wylie3 = ((TGCPair)gcs.get(2)).getWylie();
|
||||
String wylie4 = ((TGCPair)gcs.get(3)).getWylie();
|
||||
wylieBuffer.append(unambiguousPostAVowelWylie(wylie3,
|
||||
wylie4));
|
||||
String acip3 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(2)).getACIP();
|
||||
String acip4 = (EWTSNotACIP) ? null : ((TGCPair)gcs.get(3)).getACIP();
|
||||
translitBuffer.append(unambiguousPostAVowelTranslit(EWTSNotACIP,
|
||||
wylie3,
|
||||
wylie4,
|
||||
acip3,
|
||||
acip4));
|
||||
}
|
||||
} else if ("number" == candidateType) {
|
||||
leftover = 0;
|
||||
|
@ -1374,18 +1429,17 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
throw new Error("missed a case down here");
|
||||
}
|
||||
|
||||
// append the wylie left over:
|
||||
// append the wylie/ACIP left over:
|
||||
for (int i = leftover; i < sz; i++) {
|
||||
TGCPair tp = (TGCPair)gcs.get(i);
|
||||
String wylie = tp.getWylie();
|
||||
wylieBuffer.append(wylie);
|
||||
translitBuffer.append(EWTSNotACIP ? tp.getWylie() : tp.getACIP());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the Extended Wylie for a sequence of glyphs. This works as
|
||||
* follows:
|
||||
* Gets the Extended Wylie/ACIP for a sequence of glyphs. This works
|
||||
* as follows:
|
||||
*
|
||||
* <p>We run along until we hit whitespace or punctuation. We take
|
||||
* everything before that and we see if it's a legal Tibetan tsheg bar,
|
||||
|
@ -1393,22 +1447,25 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
* vowel in the correct place. If not, then we throw a disambiguating
|
||||
* key or a vowel after each stack.
|
||||
*
|
||||
* @param EWTSNotACIP true if you want THDL Extended Wylie, false if
|
||||
* you want ACIP
|
||||
* @param dcs an array of glyphs
|
||||
* @param noSuchWylie an array which will not be touched if this is
|
||||
* successful; however, if there is no THDL Extended Wylie
|
||||
* corresponding to these glyphs, then noSuchWylie[0] will be set to
|
||||
* true
|
||||
* @param noSuch an array which will not be touched if this is
|
||||
* successful; however, if there is no THDL Extended Wylie/ACIP
|
||||
* corresponding to these glyphs, then noSuch[0] will be set to true
|
||||
* @param warnings either null or a buffer to which will be appended
|
||||
* warnings about illegal tsheg bars
|
||||
* @return the Extended Wylie corresponding to these glyphs, or null */
|
||||
public static String getWylieImplementation(DuffCode[] dcs,
|
||||
boolean noSuchWylie[],
|
||||
StringBuffer warnings) {
|
||||
* @return the Extended Wylie/ACIP corresponding to these glyphs, or
|
||||
* null */
|
||||
private static String getTranslitImplementation(boolean EWTSNotACIP,
|
||||
DuffCode[] dcs,
|
||||
boolean noSuch[],
|
||||
StringBuffer warnings) {
|
||||
if (dcs.length == 0)
|
||||
return null;
|
||||
|
||||
ArrayList glyphList = new ArrayList();
|
||||
StringBuffer wylieBuffer = new StringBuffer();
|
||||
StringBuffer translitBuffer = new StringBuffer();
|
||||
|
||||
for (int i=0; i<dcs.length; i++) {
|
||||
char ch = dcs[i].getCharacter();
|
||||
|
@ -1417,41 +1474,43 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
|
||||
if (k < 32) {
|
||||
if (!glyphList.isEmpty()) {
|
||||
getTshegBarWylie(glyphList, noSuchWylie,
|
||||
warnings, wylieBuffer);
|
||||
getTshegBarTranslit(EWTSNotACIP, glyphList, noSuch,
|
||||
warnings, translitBuffer);
|
||||
glyphList.clear();
|
||||
if (null != warnings)
|
||||
warnings.append("Some glyphs came right before a newline; they did not have a tsheg or shad come first.");
|
||||
}
|
||||
|
||||
wylieBuffer.append(ch);
|
||||
translitBuffer.append(ch);
|
||||
} else {
|
||||
String wylie = TibetanMachineWeb.getWylieForGlyph(dcs[i], noSuchWylie);
|
||||
String wylie = TibetanMachineWeb.getWylieForGlyph(dcs[i], noSuch);
|
||||
String acip = EWTSNotACIP ? null : TibetanMachineWeb.getACIPForGlyph(dcs[i], noSuch);
|
||||
if (TibetanMachineWeb.isWyliePunc(wylie)
|
||||
&& !TibetanMachineWeb.isWylieAdornment(wylie)) {
|
||||
if (!glyphList.isEmpty()) {
|
||||
getTshegBarWylie(glyphList, noSuchWylie,
|
||||
warnings, wylieBuffer);
|
||||
getTshegBarTranslit(EWTSNotACIP, glyphList, noSuch,
|
||||
warnings, translitBuffer);
|
||||
glyphList.clear();
|
||||
}
|
||||
wylieBuffer.append(wylie); //append the punctuation
|
||||
translitBuffer.append(EWTSNotACIP ? wylie : acip); //append the punctuation
|
||||
} else {
|
||||
glyphList.add(dcs[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// replace remaining TMW with Wylie
|
||||
// replace remaining TMW with transliteration
|
||||
|
||||
if (!glyphList.isEmpty()) {
|
||||
getTshegBarWylie(glyphList, noSuchWylie, warnings, wylieBuffer);
|
||||
getTshegBarTranslit(EWTSNotACIP, glyphList, noSuch,
|
||||
warnings, translitBuffer);
|
||||
// glyphList.clear() if we weren't about to exit...
|
||||
if (null != warnings)
|
||||
warnings.append("The stretch of Tibetan ended without final punctuation.");
|
||||
}
|
||||
|
||||
if (wylieBuffer.length() > 0)
|
||||
return wylieBuffer.toString();
|
||||
if (translitBuffer.length() > 0)
|
||||
return translitBuffer.toString();
|
||||
else
|
||||
return null;
|
||||
}
|
||||
|
|
|
@ -294,6 +294,18 @@ public class TibetanDocument extends DefaultStyledDocument {
|
|||
return getWylie(0, getLength(), noSuchWylie);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts the entire document into ACIP. If the document consists of
|
||||
* both Tibetan and non-Tibetan fonts, however, the conversion stops at
|
||||
* the first non-Tibetan font.
|
||||
* @param noSuchACIP an array which will not be touched if this is
|
||||
* successful; however, if there is no ACIP corresponding to one of
|
||||
* these glyphs, then noSuchACIP[0] will be set to true
|
||||
* @return the string of ACIP corresponding to this document */
|
||||
public String getACIP(boolean noSuchACIP[]) {
|
||||
return getACIP(0, getLength(), noSuchACIP);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a portion of the document into Extended Wylie.
|
||||
* If the document consists of both Tibetan and
|
||||
|
@ -306,7 +318,25 @@ public class TibetanDocument extends DefaultStyledDocument {
|
|||
* corresponding to one of these glyphs, then noSuchWylie[0] will be
|
||||
* set to true
|
||||
* @return the string of Wylie corresponding to this document */
|
||||
public String getWylie(int begin, int end, boolean noSuchWylie[]) {
|
||||
public String getWylie(int begin, int end, boolean noSuchWylie[]) {
|
||||
return getTranslit(true, begin, end, noSuchWylie);
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a portion of the document into ACIP. If the document
|
||||
* consists of both Tibetan and non-Tibetan fonts, however, the
|
||||
* conversion stops at the first non-Tibetan font.
|
||||
* @param begin the beginning of the region to convert
|
||||
* @param end the end of the region to convert
|
||||
* @param noSuchWylie an array which will not be touched if this is
|
||||
* successful; however, if there is no ACIP corresponding to one of
|
||||
* these glyphs, then noSuchACIP[0] will be set to true
|
||||
* @return the string of ACIP corresponding to this document */
|
||||
public String getACIP(int begin, int end, boolean noSuchACIP[]) {
|
||||
return getTranslit(true, begin, end, noSuchACIP);
|
||||
}
|
||||
|
||||
private String getTranslit(boolean EWTSNotACIP, int begin, int end, boolean noSuch[]) {
|
||||
AttributeSet attr;
|
||||
String fontName;
|
||||
int fontNum;
|
||||
|
@ -318,7 +348,7 @@ public class TibetanDocument extends DefaultStyledDocument {
|
|||
|
||||
java.util.List dcs = new ArrayList();
|
||||
int i = begin;
|
||||
StringBuffer wylieBuffer = new StringBuffer();
|
||||
StringBuffer translitBuffer = new StringBuffer();
|
||||
|
||||
try {
|
||||
while (i < end) {
|
||||
|
@ -332,10 +362,10 @@ public class TibetanDocument extends DefaultStyledDocument {
|
|||
if (dcs.size() > 0) {
|
||||
DuffCode[] dc_array = new DuffCode[0];
|
||||
dc_array = (DuffCode[])dcs.toArray(dc_array);
|
||||
wylieBuffer.append(TibTextUtils.getWylie(dc_array, noSuchWylie));
|
||||
translitBuffer.append(TibTextUtils.getTranslit(EWTSNotACIP, dc_array, noSuch));
|
||||
dcs.clear();
|
||||
}
|
||||
wylieBuffer.append(ch);
|
||||
translitBuffer.append(ch);
|
||||
}
|
||||
|
||||
//current character isn't TMW
|
||||
|
@ -343,7 +373,7 @@ public class TibetanDocument extends DefaultStyledDocument {
|
|||
if (dcs.size() > 0) {
|
||||
DuffCode[] dc_array = new DuffCode[0];
|
||||
dc_array = (DuffCode[])dcs.toArray(dc_array);
|
||||
wylieBuffer.append(TibTextUtils.getWylie(dc_array, noSuchWylie));
|
||||
translitBuffer.append(TibTextUtils.getTranslit(EWTSNotACIP, dc_array, noSuch));
|
||||
dcs.clear();
|
||||
}
|
||||
}
|
||||
|
@ -358,9 +388,9 @@ public class TibetanDocument extends DefaultStyledDocument {
|
|||
if (dcs.size() > 0) {
|
||||
DuffCode[] dc_array = new DuffCode[0];
|
||||
dc_array = (DuffCode[])dcs.toArray(dc_array);
|
||||
wylieBuffer.append(TibTextUtils.getWylie(dc_array, noSuchWylie));
|
||||
translitBuffer.append(TibTextUtils.getTranslit(EWTSNotACIP, dc_array, noSuch));
|
||||
}
|
||||
return wylieBuffer.toString();
|
||||
return translitBuffer.toString();
|
||||
}
|
||||
catch (BadLocationException ble) {
|
||||
ble.printStackTrace();
|
||||
|
@ -1101,6 +1131,17 @@ public class TibetanDocument extends DefaultStyledDocument {
|
|||
* DuffCode..." text into the document */
|
||||
public boolean toWylie(int start, int end,
|
||||
long numAttemptedReplacements[]) {
|
||||
return toTranslit(true, start, end, numAttemptedReplacements);
|
||||
}
|
||||
|
||||
// DLC DOC just like {@link #toWylie(int,int,long[])}
|
||||
public boolean toACIP(int start, int end,
|
||||
long numAttemptedReplacements[]) {
|
||||
return toTranslit(false, start, end, numAttemptedReplacements);
|
||||
}
|
||||
|
||||
private boolean toTranslit(boolean EWTSNotACIP, int start, int end,
|
||||
long numAttemptedReplacements[]) {
|
||||
if (start >= end)
|
||||
return true;
|
||||
|
||||
|
@ -1124,7 +1165,9 @@ public class TibetanDocument extends DefaultStyledDocument {
|
|||
remove(start, i-start);
|
||||
ThdlDebug.verify(getRomanAttributeSet() != null);
|
||||
insertString(start,
|
||||
TibTextUtils.getWylie(dc_array, noSuchWylie),
|
||||
TibTextUtils.getTranslit(EWTSNotACIP,
|
||||
dc_array,
|
||||
noSuchWylie),
|
||||
getRomanAttributeSet());
|
||||
dcs.clear();
|
||||
}
|
||||
|
|
|
@ -1706,6 +1706,13 @@ public static String wylieForGlyph(String hashKey) {
|
|||
return sb.toString();
|
||||
}
|
||||
|
||||
// DLC DOC
|
||||
private static String acipForGlyph(String hashKey) {
|
||||
String ACIP // DLC FIXME: test this.
|
||||
= org.thdl.tib.scanner.Manipulate.wylieToAcip(hashKey);
|
||||
return ACIP;
|
||||
}
|
||||
|
||||
/** Error that appears in a document when some TMW cannot be
|
||||
* transcribed in THDL Extended Wylie. This error message is
|
||||
* documented in www/htdocs/TMW_RTF_TO_THDL_WYLIE.html, so change
|
||||
|
@ -1716,6 +1723,16 @@ private static String getTMWToWylieErrorString(DuffCode dc) {
|
|||
+ " to THDL Extended Wylie. Please see the documentation for the TMW font and transcribe this yourself.]]>>";
|
||||
}
|
||||
|
||||
/** Error that appears in a document when some TMW cannot be
|
||||
* transcribed in ACIP. This error message is
|
||||
* documented in www/htdocs/TMW_RTF_TO_THDL_WYLIE.html (DLC NOT YET), so change
|
||||
* them both when you change this. */
|
||||
private static String getTMWToACIPErrorString(DuffCode dc) {
|
||||
return "<<[[JSKAD_TMW_TO_ACIP_ERROR_NO_SUCH_ACIP: Cannot convert DuffCode "
|
||||
+ dc.toString(true)
|
||||
+ " to ACIP. Please see the documentation for the TMW font and transcribe this yourself.]]>>";
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the Extended Wylie value for this glyph.
|
||||
* @param font the font of the TibetanMachineWeb
|
||||
|
@ -1756,6 +1773,17 @@ public static String getWylieForGlyph(DuffCode dc, boolean noSuchWylie[]) {
|
|||
return wylieForGlyph(hashKey);
|
||||
}
|
||||
|
||||
// DLC DOC
|
||||
public static String getACIPForGlyph(DuffCode dc, boolean noSuchACIP[]) {
|
||||
String hashKey = getHashKeyForGlyph(dc);
|
||||
String ans = (hashKey == null) ? null : acipForGlyph(hashKey);
|
||||
if (hashKey == null || ans == null) {
|
||||
noSuchACIP[0] = true;
|
||||
return getTMWToACIPErrorString(dc);
|
||||
}
|
||||
return ans;
|
||||
}
|
||||
|
||||
/** This addresses bug 624133, "Input freezes after impossible
|
||||
* character". Returns true iff s is a proper prefix of some
|
||||
* legal input for this keyboard. In the extended Wylie
|
||||
|
|
Loading…
Reference in a new issue