Fixed TWM->Wylie for achen. I even tested this by pretending achen could take a da prefix (when in reality it takes no prefixes).
This commit is contained in:
parent
37e8dfa917
commit
216c5b0d54
5 changed files with 51 additions and 14 deletions
|
@ -28,7 +28,7 @@ package org.thdl.tib.text;
|
|||
when this is U+0F7F alone.
|
||||
|
||||
@author David Chandler */
|
||||
public class TGCPair {
|
||||
public class TGCPair implements THDLWylieConstants {
|
||||
public static final int OTHER = 1;
|
||||
// a standalone achen would fall into this category:
|
||||
public static final int CONSONANTAL_WITHOUT_VOWEL = 2;
|
||||
|
@ -83,6 +83,13 @@ public class TGCPair {
|
|||
@see #getACIP(String)
|
||||
*/
|
||||
public String getWylie(String previousTranslitIfAppendaged) {
|
||||
if (ACHEN.equals(consonantWylie)) {
|
||||
// Unlike ACIP, EWTS uses e for achen with e vowel, not ae.
|
||||
if (null == vowelWylie)
|
||||
return ACHEN;
|
||||
else
|
||||
return vowelWylie;
|
||||
}
|
||||
StringBuffer b = new StringBuffer();
|
||||
if (consonantWylie != null) {
|
||||
// Think of pa'am... we want 'am, not 'm; 'ang, not 'ng. But we want 'ur, not 'uar, 'is, not 'ias.
|
||||
|
|
|
@ -341,7 +341,7 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
throw new InvalidACIPException("Fatal error converting ACIP to TMW.");
|
||||
}
|
||||
String warningLevel = withWarnings ? "All" : "None";
|
||||
boolean colors = false;
|
||||
boolean colors = withWarnings;
|
||||
boolean putWarningsInOutput = false;
|
||||
if ("None" != warningLevel) {
|
||||
putWarningsInOutput = true;
|
||||
|
@ -901,7 +901,7 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
is already "a". */
|
||||
private static String aVowelToUseAfter(boolean EWTSNotACIP, String wylie) {
|
||||
if (wylie.equals(ACHEN))
|
||||
return "";
|
||||
return ""; // it's a, not aa, for achen alone.
|
||||
else
|
||||
return (EWTSNotACIP) ? WYLIE_aVOWEL : "A";
|
||||
}
|
||||
|
@ -1379,6 +1379,18 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
int cls = tp.classification;
|
||||
String wylie = tp.getWylie();
|
||||
String translit = (EWTSNotACIP) ? wylie : tp.getACIP();
|
||||
if (TibetanMachineWeb.isWylieVowel(wylie) && i > 0) {
|
||||
// au would be achen with au vowel, so use a.u; ai
|
||||
// would be achen with ai vowel, so use a.i; l-i
|
||||
// won't happen, you'd see la-i or gla-i, not l-i
|
||||
// or gl-i; similarly for r-i, r-I, and l-I.
|
||||
|
||||
// Even though we only need it for ka.u and ka.i
|
||||
// and a.u and a.i, we always do it (see Rule 10
|
||||
// of the September 1, 2003 draft of EWTS
|
||||
// standard).
|
||||
translitBuffer.append(WYLIE_DISAMBIGUATING_KEY);
|
||||
}
|
||||
translitBuffer.append(translit);
|
||||
if (TibetanMachineWeb.isWylieTibetanConsonantOrConsonantStack(wylie)
|
||||
|| TibetanMachineWeb.isWylieSanskritConsonantStack(wylie)) {
|
||||
|
@ -1431,7 +1443,7 @@ public class TibTextUtils implements THDLWylieConstants {
|
|||
|
||||
leftover = 3;
|
||||
/* FIXME: these constants are hard-wired here, rather
|
||||
* than in TibetanMachineWeb, because I'm lazy. */
|
||||
* than in THDLWylieConstants, because I'm lazy. */
|
||||
String wylie1 = ((TGCPair)gcs.get(0)).getWylie();
|
||||
String wylie2 = ((TGCPair)gcs.get(1)).getWylie();
|
||||
String wylie3 = ((TGCPair)gcs.get(2)).getWylie();
|
||||
|
|
|
@ -1002,8 +1002,6 @@ private static boolean isAmbHelper(String y) {
|
|||
* @return true if x + y is ambiguous in the Extended Wylie
|
||||
* transliteration, false if not */
|
||||
public static boolean isAmbiguousWylie(String x, String y) {
|
||||
// DLC NOW: BDE vs. B+DE -- TMW->ACIP should give B+DE to be very friendly to machines.
|
||||
|
||||
// What about ambiguity between wa-zur and wa? dwa vs. d.wa, e.g.?
|
||||
// Some would say it doesn't matter, because that's illegal. wa
|
||||
// doesn't take any prefixes. But I want even illegal stuff to
|
||||
|
@ -1016,7 +1014,7 @@ public static boolean isAmbiguousWylie(String x, String y) {
|
|||
// for the regular expressions ^d-, ^m-, ^'-, ^g-, and ^b- shows
|
||||
// you all the fellows that could be ambiguous.
|
||||
|
||||
return (("g".equals(x) && y.startsWith("y") && isAmbHelper(y))
|
||||
return (("g".equals(x) && y.startsWith("y") && isAmbHelper(y))
|
||||
|| ("g".equals(x) && y.startsWith("w") && isAmbHelper(y))
|
||||
|| ("d".equals(x) && y.startsWith("w") && isAmbHelper(y))
|
||||
|| ("d".equals(x) && y.startsWith("z") && isAmbHelper(y))
|
||||
|
@ -1025,7 +1023,14 @@ public static boolean isAmbiguousWylie(String x, String y) {
|
|||
|| ("m".equals(x) && y.startsWith("y") && isAmbHelper(y))
|
||||
|| ("b".equals(x) && y.startsWith("y") && isAmbHelper(y))
|
||||
|| ("g".equals(x) && y.startsWith("rw"))
|
||||
|| ("d".equals(x) && y.startsWith("rw")));
|
||||
|| ("d".equals(x) && y.startsWith("rw"))
|
||||
|
||||
// Because we wouldn't want to generate de for d.e (if
|
||||
// achen took a da prefix): [This is a HIGH-CLASS WORRY
|
||||
// because achen doesn't take any prefixes. But I'm
|
||||
// thorough when I think to be.]
|
||||
|| isWylieVowel(y)
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -2059,3 +2064,6 @@ public static boolean isTopVowel(DuffCode dc) {
|
|||
// FIXME: am I missing anything? tabs etc.?
|
||||
}
|
||||
}
|
||||
|
||||
// FIXME MAKE AUTOMATED TEST: BDE vs. B+DE -- TMW->ACIP should
|
||||
// give B+DE to be very friendly to machines.
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue