TMW->ACIP is much improved. V and W were confused, # and * were

confused; many glyphs that should have yielded errors were not. I've added a test case that transforms every TMW glyph save the one with no TM mapping to ACIP. I hand-checked that it was correct. ACIP->TMW is fixed for # and *. I never noticed it, but each needed an extra swoosh (U+0F05). Round-tripping would be good, as would testing real-world use of TMW->ACIP.
2004-04-14 05:44:51 +00:00 · 2004-04-14 05:44:51 +00:00 · 1bfd3772e6
commit 1bfd3772e6
parent 244a9d1370
10 changed files with 1110 additions and 85 deletions
--- a/source/org/thdl/tib/text/TGCPair.java
+++ b/source/org/thdl/tib/text/TGCPair.java
@ -112,17 +112,28 @@ public class TGCPair implements THDLWylieConstants {
    public String getACIP() {
        return getACIP(null);
    }
-    /** Like {@link #getWylie(String)} but for ACIP transliteration, not EWTS. */
+    /** Like {@link #getWylie(String)} but for ACIP transliteration,
+        not EWTS. */
    public String getACIP(String previousTranslitIfAppendaged) {
        // DLC FIXME: has the EWTS change affected Manipulate.acipToWylie?
        StringBuffer b = new StringBuffer();
        if (consonantWylie != null) {
            String consonantACIP
-                = org.thdl.tib.text.ttt.ACIPRules.getACIPForEWTS(consonantWylie);
+                = null;
+            if ("w".equals(consonantWylie)
+                && (SANSKRIT_WITHOUT_VOWEL == classification
+                    || SANSKRIT_WITH_VOWEL == classification))
+                consonantACIP = "V";
+            else
+                consonantACIP
+                    = org.thdl.tib.text.ttt.ACIPRules.getACIPForEWTS(consonantWylie);
            if (null == consonantACIP) {
-                return TibetanMachineWeb.getTMWToACIPErrorString("glyph with THDL Extended Wylie " + consonantWylie);
+                if (null != consonantWylie && consonantWylie.startsWith("R+"))
+                    return TibetanMachineWeb.getTMWToACIPErrorString("glyph with THDL Extended Wylie " + consonantWylie, " because the ACIP R+... could imply the short superscribed form, but this most likely intends the full form (i.e., Unicode character U+0F6A)");
+                return TibetanMachineWeb.getTMWToACIPErrorString("glyph with THDL Extended Wylie " + consonantWylie, "");
            } else {
-                // Think of pa'am...  we want 'am, not 'm; 'ang, not 'ng.  But we want 'ur, not 'uar, 'is, not 'ias.
+                // Think of pa'am...  we want 'am, not 'm; 'ang, not
+                // 'ng.  But we want 'ur, not 'uar, 'is, not 'ias.
                if (null != previousTranslitIfAppendaged
                    && "'".equals(previousTranslitIfAppendaged)) {
                    b.append("A");
@ -140,7 +151,7 @@ public class TGCPair implements THDLWylieConstants {
            String vowelACIP
                = org.thdl.tib.text.ttt.ACIPRules.getACIPForEWTS(vowelWylie);
            if (null == vowelACIP) {
-                return TibetanMachineWeb.getTMWToACIPErrorString("glyph with THDL Extended Wylie " + vowelWylie);
+                return TibetanMachineWeb.getTMWToACIPErrorString("glyph with THDL Extended Wylie " + vowelWylie, "");
            } else {
                b.append(vowelACIP);
            }