Fixed TWM->Wylie for achen. I even tested this by pretending achen could take a da prefix (when in reality it takes no prefixes).

This commit is contained in:
dchandler 2003-11-23 01:22:27 +00:00
parent 37e8dfa917
commit 216c5b0d54
5 changed files with 51 additions and 14 deletions

View file

@ -28,7 +28,7 @@ package org.thdl.tib.text;
when this is U+0F7F alone.
@author David Chandler */
public class TGCPair {
public class TGCPair implements THDLWylieConstants {
public static final int OTHER = 1;
// a standalone achen would fall into this category:
public static final int CONSONANTAL_WITHOUT_VOWEL = 2;
@ -83,6 +83,13 @@ public class TGCPair {
@see #getACIP(String)
*/
public String getWylie(String previousTranslitIfAppendaged) {
if (ACHEN.equals(consonantWylie)) {
// Unlike ACIP, EWTS uses e for achen with e vowel, not ae.
if (null == vowelWylie)
return ACHEN;
else
return vowelWylie;
}
StringBuffer b = new StringBuffer();
if (consonantWylie != null) {
// Think of pa'am... we want 'am, not 'm; 'ang, not 'ng. But we want 'ur, not 'uar, 'is, not 'ias.

View file

@ -341,7 +341,7 @@ public class TibTextUtils implements THDLWylieConstants {
throw new InvalidACIPException("Fatal error converting ACIP to TMW.");
}
String warningLevel = withWarnings ? "All" : "None";
boolean colors = false;
boolean colors = withWarnings;
boolean putWarningsInOutput = false;
if ("None" != warningLevel) {
putWarningsInOutput = true;
@ -901,7 +901,7 @@ public class TibTextUtils implements THDLWylieConstants {
is already "a". */
private static String aVowelToUseAfter(boolean EWTSNotACIP, String wylie) {
if (wylie.equals(ACHEN))
return "";
return ""; // it's a, not aa, for achen alone.
else
return (EWTSNotACIP) ? WYLIE_aVOWEL : "A";
}
@ -1379,6 +1379,18 @@ public class TibTextUtils implements THDLWylieConstants {
int cls = tp.classification;
String wylie = tp.getWylie();
String translit = (EWTSNotACIP) ? wylie : tp.getACIP();
if (TibetanMachineWeb.isWylieVowel(wylie) && i > 0) {
// au would be achen with au vowel, so use a.u; ai
// would be achen with ai vowel, so use a.i; l-i
// won't happen, you'd see la-i or gla-i, not l-i
// or gl-i; similarly for r-i, r-I, and l-I.
// Even though we only need it for ka.u and ka.i
// and a.u and a.i, we always do it (see Rule 10
// of the September 1, 2003 draft of EWTS
// standard).
translitBuffer.append(WYLIE_DISAMBIGUATING_KEY);
}
translitBuffer.append(translit);
if (TibetanMachineWeb.isWylieTibetanConsonantOrConsonantStack(wylie)
|| TibetanMachineWeb.isWylieSanskritConsonantStack(wylie)) {
@ -1431,7 +1443,7 @@ public class TibTextUtils implements THDLWylieConstants {
leftover = 3;
/* FIXME: these constants are hard-wired here, rather
* than in TibetanMachineWeb, because I'm lazy. */
* than in THDLWylieConstants, because I'm lazy. */
String wylie1 = ((TGCPair)gcs.get(0)).getWylie();
String wylie2 = ((TGCPair)gcs.get(1)).getWylie();
String wylie3 = ((TGCPair)gcs.get(2)).getWylie();

View file

@ -1002,8 +1002,6 @@ private static boolean isAmbHelper(String y) {
* @return true if x + y is ambiguous in the Extended Wylie
* transliteration, false if not */
public static boolean isAmbiguousWylie(String x, String y) {
// DLC NOW: BDE vs. B+DE -- TMW->ACIP should give B+DE to be very friendly to machines.
// What about ambiguity between wa-zur and wa? dwa vs. d.wa, e.g.?
// Some would say it doesn't matter, because that's illegal. wa
// doesn't take any prefixes. But I want even illegal stuff to
@ -1016,7 +1014,7 @@ public static boolean isAmbiguousWylie(String x, String y) {
// for the regular expressions ^d-, ^m-, ^'-, ^g-, and ^b- shows
// you all the fellows that could be ambiguous.
return (("g".equals(x) && y.startsWith("y") && isAmbHelper(y))
return (("g".equals(x) && y.startsWith("y") && isAmbHelper(y))
|| ("g".equals(x) && y.startsWith("w") && isAmbHelper(y))
|| ("d".equals(x) && y.startsWith("w") && isAmbHelper(y))
|| ("d".equals(x) && y.startsWith("z") && isAmbHelper(y))
@ -1025,7 +1023,14 @@ public static boolean isAmbiguousWylie(String x, String y) {
|| ("m".equals(x) && y.startsWith("y") && isAmbHelper(y))
|| ("b".equals(x) && y.startsWith("y") && isAmbHelper(y))
|| ("g".equals(x) && y.startsWith("rw"))
|| ("d".equals(x) && y.startsWith("rw")));
|| ("d".equals(x) && y.startsWith("rw"))
// Because we wouldn't want to generate de for d.e (if
// achen took a da prefix): [This is a HIGH-CLASS WORRY
// because achen doesn't take any prefixes. But I'm
// thorough when I think to be.]
|| isWylieVowel(y)
);
}
/**
@ -2059,3 +2064,6 @@ public static boolean isTopVowel(DuffCode dc) {
// FIXME: am I missing anything? tabs etc.?
}
}
// FIXME MAKE AUTOMATED TEST: BDE vs. B+DE -- TMW->ACIP should
// give B+DE to be very friendly to machines.