ACIP->Unicode, without going through TMW, is now possible, so long as

\, the Sanskrit virama, is not used. Of the 1370-odd ACIP texts I've got here, about 57% make it through the gauntlet (fewer if you demand a vowel or disambiguator on every stack of a non-Tibetan tsheg bar).
2003-08-18 02:38:54 +00:00 · 2003-08-18 02:38:54 +00:00 · 1afb3a0fdd
commit 1afb3a0fdd
parent 245aac4911
12 changed files with 646 additions and 40 deletions
--- a/source/org/thdl/tib/text/tshegbar/LegalTshegBar.java
+++ b/source/org/thdl/tib/text/tshegbar/LegalTshegBar.java
@ -341,7 +341,7 @@ public final class LegalTshegBar
            EWC_ta,  EWC_tha,  EWC_da,     EWC_na,
            EWC_pa,  EWC_pha,  EWC_ba,     EWC_ma,
            EWC_tsa, EWC_tsha, EWC_dza,    EWC_wa,
-            EWC_zha, EWC_za,   EWC_achung,  EWC_ya,
+            EWC_zha, EWC_za,   EWC_achung, EWC_ya,
            EWC_ra,  EWC_la,   EWC_sha,    EWC_sa,
            EWC_ha,  EWC_a
        });
@ -833,7 +833,7 @@ public final class LegalTshegBar
                    return internalThrowThing(throwIfIllegal,
                                              errorBuf,
                                              "Illegal suffix -- not one of the ten legal suffixes: "
-                                              + UnicodeUtils.unicodeCodepointToString(suffix.charAt(0)));
+                                              + UnicodeUtils.unicodeCodepointToString(suffix.charAt(0), false));
                }
            }
        }