ACIP->Unicode, without going through TMW, is now possible, so long as

\, the Sanskrit virama, is not used. Of the 1370-odd ACIP texts I've got here, about 57% make it through the gauntlet (fewer if you demand a vowel or disambiguator on every stack of a non-Tibetan tsheg bar).
2003-08-18 02:38:54 +00:00 · 2003-08-18 02:38:54 +00:00 · 1afb3a0fdd
commit 1afb3a0fdd
parent 245aac4911
12 changed files with 646 additions and 40 deletions
--- a/source/org/thdl/tib/text/ttt/ACIPString.java
+++ b/source/org/thdl/tib/text/ttt/ACIPString.java
@ -30,6 +30,15 @@ public class ACIPString {
    private int type;
    private String text;

+    /** Returns true if and only if an ACIPString with type type is to
+     *  be converted to Latin, not Tibetan, text. */
+    public static boolean isLatin(int type) {
+        return (type != TIBETAN_NON_PUNCTUATION
+                && type != TIBETAN_PUNCTUATION
+                && type != START_SLASH
+                && type != END_SLASH);
+    }
+
    /** For [#COMMENTS] */
    public static final int COMMENT = 0;
    /** For Folio markers like @012B */