TMW->ACIP is much improved. V and W were confused, # and * were

confused; many glyphs that should have yielded errors were not. I've added a test case that transforms every TMW glyph save the one with no TM mapping to ACIP. I hand-checked that it was correct. ACIP->TMW is fixed for # and *. I never noticed it, but each needed an extra swoosh (U+0F05). Round-tripping would be good, as would testing real-world use of TMW->ACIP.
2004-04-14 05:44:51 +00:00 · 2004-04-14 05:44:51 +00:00 · 1bfd3772e6
commit 1bfd3772e6
parent 244a9d1370
10 changed files with 1110 additions and 85 deletions
--- a/source/org/thdl/tib/text/ttt/ACIPConverter.java
+++ b/source/org/thdl/tib/text/ttt/ACIPConverter.java
@ -628,9 +628,16 @@ public class ACIPConverter {
                                        tdocLocation[0] += s.getText().length();
                                        continue; // FIXME: this means the unicode above doesn't go into the output if null != writer && null != tdoc?
                                    } else {
-                                        String wy = ACIPRules.getWylieForACIPOther(s.getText());
-                                        if (null == wy) throw new Error("No wylie for ACIP " + s.getText());
-                                        duff = new Object[] { TibetanMachineWeb.getGlyph(wy) };
+                                        if ("#".equals(s.getText())) { // hard-coded ACIP value
+                                            duff = new Object[] {
+                                                TibetanMachineWeb.getGlyph("@#"),
+                                                TibetanMachineWeb.getGlyph("#")
+                                            }; // hard-coded EWTS values
+                                        } else {
+                                            String wy = ACIPRules.getWylieForACIPOther(s.getText());
+                                            if (null == wy) throw new Error("No wylie for ACIP " + s.getText());
+                                            duff = new Object[] { TibetanMachineWeb.getGlyph(wy) };
+                                        }
                                    }
                                }
                            }
--- a/source/org/thdl/tib/text/ttt/ACIPRules.java
+++ b/source/org/thdl/tib/text/ttt/ACIPRules.java
@ -157,6 +157,9 @@ public class ACIPRules {
        getWylieForACIPOther(null);
        getWylieForACIPVowel(null);
        String ans = (String)wylieToACIP.get(EWTS);
+        boolean useCapitalW = false;
+        if (EWTS.startsWith("w"))
+            useCapitalW = true; // We want W+NA, not V+NA; we want WA, not VA.
        if (null == ans) {
            StringBuffer finalAns = new StringBuffer(EWTS.length());
            StringTokenizer sTok = new StringTokenizer(EWTS, "-+", true);
@ -182,9 +185,14 @@ public class ACIPRules {
                if (null == part) return null;
                finalAns.append(part);
            }
+            if (useCapitalW)
+                finalAns.setCharAt(0, 'W');
            return finalAns.toString();
        }
-        return ans;
+        if (useCapitalW)
+            return "W" + ans.substring(1);
+        else
+            return ans;
    }

    /** Registers acip->wylie mappings in toWylie; registers
@ -193,6 +201,12 @@ public class ACIPRules {
        toWylie.put(ACIP, EWTS);
        if (null == wylieToACIP) {
            wylieToACIP = new HashMap(75);
+
+            // We don't want to put "/" in toWylie:
+            wylieToACIP.put("(", "/");
+            wylieToACIP.put(")", "/");
+            wylieToACIP.put("?", "\\");
+
            wylieToACIP.put("_", " "); // oddball.
            wylieToACIP.put("o'i", "O'I"); // oddball for TMW9.61.
        }
@ -307,14 +321,20 @@ public class ACIPRules {
        if (acipOther2wylie == null) {
            acipOther2wylie = new HashMap(20);

+            // don't use putMapping for this.  We don't want TMW->ACIP
+            // to produce "." for a U+0F0C because ACIP doesn't say
+            // that "." means U+0F0C.  It just seems to in practice
+            // for ACIP Release IV texts.
+            acipOther2wylie.put(".", "*");
+
+            putMapping(acipOther2wylie, "m", "M");
+            putMapping(acipOther2wylie, ":", "H");
            putMapping(acipOther2wylie, ",", "/");
            putMapping(acipOther2wylie, " ", " ");
-            putMapping(acipOther2wylie, ".", "*");
-            putMapping(acipOther2wylie, "|", "|");
+            putMapping(acipOther2wylie, ";", "|");
            putMapping(acipOther2wylie, "`", "!");
-            putMapping(acipOther2wylie, ";", ";");
-            putMapping(acipOther2wylie, "*", "@");
-            putMapping(acipOther2wylie, "#", "@#");
+            putMapping(acipOther2wylie, "*", "@#");
+            // There is no glyph in TMW with the EWTS @##, so we don't do this: putMapping(acipOther2wylie, "#", "@##");
            putMapping(acipOther2wylie, "%", "~X");
            putMapping(acipOther2wylie, "o", "X");
            putMapping(acipOther2wylie, "&", "&");
--- a/source/org/thdl/tib/text/ttt/TParseTree.java
+++ b/source/org/thdl/tib/text/ttt/TParseTree.java
@ -359,6 +359,7 @@ class TParseTree {
                        }
                    }
                    if (stackSize > 1 && tp.getLeft() != null && tp.getLeft().length() > 1) {
+                        // DLC FIXME: gives a false positive warning for Rsh
                        hasAmbiguousConsonant = true;
                    }
                }