Fixed TWM->Wylie for achen. I even tested this by pretending achen could take a da prefix (when in reality it takes no prefixes).

2003-11-23 01:22:27 +00:00 · 2003-11-23 01:22:27 +00:00 · 216c5b0d54
commit 216c5b0d54
parent 37e8dfa917
5 changed files with 51 additions and 14 deletions
--- a/source/org/thdl/tib/text/TGCPair.java
+++ b/source/org/thdl/tib/text/TGCPair.java
@ -28,7 +28,7 @@ package org.thdl.tib.text;
    when this is U+0F7F alone.

    @author David Chandler */
-public class TGCPair {
+public class TGCPair implements THDLWylieConstants {
    public static final int OTHER = 1;
    // a standalone achen would fall into this category:
    public static final int CONSONANTAL_WITHOUT_VOWEL = 2;
@ -83,6 +83,13 @@ public class TGCPair {
        @see #getACIP(String)
    */
    public String getWylie(String previousTranslitIfAppendaged) {
+        if (ACHEN.equals(consonantWylie)) {
+            // Unlike ACIP, EWTS uses e for achen with e vowel, not ae.
+            if (null == vowelWylie)
+                return ACHEN;
+            else
+                return vowelWylie;
+        }
        StringBuffer b = new StringBuffer();
        if (consonantWylie != null) {
            // Think of pa'am...  we want 'am, not 'm; 'ang, not 'ng.  But we want 'ur, not 'uar, 'is, not 'ias.
--- a/source/org/thdl/tib/text/TibTextUtils.java
+++ b/source/org/thdl/tib/text/TibTextUtils.java
@ -341,7 +341,7 @@ public class TibTextUtils implements THDLWylieConstants {
                throw new InvalidACIPException("Fatal error converting ACIP to TMW.");
        }
        String warningLevel = withWarnings ? "All" : "None";
-        boolean colors = false;
+        boolean colors = withWarnings;
        boolean putWarningsInOutput = false;
        if ("None" != warningLevel) {
            putWarningsInOutput = true;
@ -901,7 +901,7 @@ public class TibTextUtils implements THDLWylieConstants {
        is already "a". */
    private static String aVowelToUseAfter(boolean EWTSNotACIP, String wylie) {
        if (wylie.equals(ACHEN))
-            return "";
+            return ""; // it's a, not aa, for achen alone.
        else
            return (EWTSNotACIP) ? WYLIE_aVOWEL : "A";
    }
@ -1379,6 +1379,18 @@ public class TibTextUtils implements THDLWylieConstants {
                int cls = tp.classification;
                String wylie = tp.getWylie();
                String translit = (EWTSNotACIP) ? wylie : tp.getACIP();
+                if (TibetanMachineWeb.isWylieVowel(wylie) && i > 0) {
+                    // au would be achen with au vowel, so use a.u; ai
+                    // would be achen with ai vowel, so use a.i; l-i
+                    // won't happen, you'd see la-i or gla-i, not l-i
+                    // or gl-i; similarly for r-i, r-I, and l-I.
+
+                    // Even though we only need it for ka.u and ka.i
+                    // and a.u and a.i, we always do it (see Rule 10
+                    // of the September 1, 2003 draft of EWTS
+                    // standard).
+                    translitBuffer.append(WYLIE_DISAMBIGUATING_KEY);
+                }
                translitBuffer.append(translit);
                if (TibetanMachineWeb.isWylieTibetanConsonantOrConsonantStack(wylie)
                    || TibetanMachineWeb.isWylieSanskritConsonantStack(wylie)) {
@ -1431,7 +1443,7 @@ public class TibTextUtils implements THDLWylieConstants {

                leftover = 3;
                /* FIXME: these constants are hard-wired here, rather
-                 * than in TibetanMachineWeb, because I'm lazy. */
+                 * than in THDLWylieConstants, because I'm lazy. */
                String wylie1 = ((TGCPair)gcs.get(0)).getWylie();
                String wylie2 = ((TGCPair)gcs.get(1)).getWylie();
                String wylie3 = ((TGCPair)gcs.get(2)).getWylie();
--- a/source/org/thdl/tib/text/TibetanMachineWeb.java
+++ b/source/org/thdl/tib/text/TibetanMachineWeb.java
@ -1002,8 +1002,6 @@ private static boolean isAmbHelper(String y) {
 * @return true if x + y is ambiguous in the Extended Wylie
 * transliteration, false if not */
 public static boolean isAmbiguousWylie(String x, String y) {
-    // DLC NOW: BDE vs. B+DE -- TMW->ACIP should give B+DE to be very friendly to machines.
-
    // What about ambiguity between wa-zur and wa? dwa vs. d.wa, e.g.?
    // Some would say it doesn't matter, because that's illegal.  wa
    // doesn't take any prefixes.  But I want even illegal stuff to
@ -1016,7 +1014,7 @@ public static boolean isAmbiguousWylie(String x, String y) {
    // for the regular expressions ^d-, ^m-, ^'-, ^g-, and ^b- shows
    // you all the fellows that could be ambiguous.

-	return (("g".equals(x) && y.startsWith("y") && isAmbHelper(y))
+    return (("g".equals(x) && y.startsWith("y") && isAmbHelper(y))
            || ("g".equals(x) && y.startsWith("w") && isAmbHelper(y))
            || ("d".equals(x) && y.startsWith("w") && isAmbHelper(y))
            || ("d".equals(x) && y.startsWith("z") && isAmbHelper(y))
@ -1025,7 +1023,14 @@ public static boolean isAmbiguousWylie(String x, String y) {
            || ("m".equals(x) && y.startsWith("y") && isAmbHelper(y))
            || ("b".equals(x) && y.startsWith("y") && isAmbHelper(y))
            || ("g".equals(x) && y.startsWith("rw"))
-            || ("d".equals(x) && y.startsWith("rw")));
+            || ("d".equals(x) && y.startsWith("rw"))
+
+            // Because we wouldn't want to generate de for d.e (if
+            // achen took a da prefix): [This is a HIGH-CLASS WORRY
+            // because achen doesn't take any prefixes.  But I'm
+            // thorough when I think to be.]
+            || isWylieVowel(y)
+            );
 }

 /**
@ -2059,3 +2064,6 @@ public static boolean isTopVowel(DuffCode dc) {
        // FIXME: am I missing anything?  tabs etc.?
    }
 }
+
+// FIXME MAKE AUTOMATED TEST: BDE vs. B+DE -- TMW->ACIP should
+// give B+DE to be very friendly to machines.