ACIP->Unicode, without going through TMW, is now possible, so long as

\, the Sanskrit virama, is not used. Of the 1370-odd ACIP texts I've got here, about 57% make it through the gauntlet (fewer if you demand a vowel or disambiguator on every stack of a non-Tibetan tsheg bar).
2003-08-18 02:38:54 +00:00 · 2003-08-18 02:38:54 +00:00 · 1afb3a0fdd
commit 1afb3a0fdd
parent 245aac4911
12 changed files with 646 additions and 40 deletions
--- a/source/org/thdl/tib/text/ttt/TPair.java
+++ b/source/org/thdl/tib/text/ttt/TPair.java
@ -167,4 +167,19 @@ class TPair {
        if (null == rightWylie) rightWylie = "";
        return leftWylie + rightWylie;
    }
+
+    /** Appends legal Unicode corresponding to this (possible
+     *  subscribed) pair to sb.  DLC FIXME: which normalization form,
+     *  if any? */
+    void getUnicode(StringBuffer sb, boolean subscribed) {
+        if (null != getLeft()) {
+            String x = ACIPRules.getUnicodeFor(getLeft(), subscribed);
+            if (null != x) sb.append(x);
+        }
+        if (null != getRight()
+            && !("-".equals(getRight()) || "A".equals(getRight()))) {
+            String x = ACIPRules.getUnicodeFor(getRight(), subscribed);
+            if (null != x) sb.append(x);
+        }
+    }
 }