ACIP->Unicode, without going through TMW, is now possible, so long as

\, the Sanskrit virama, is not used.  Of the 1370-odd ACIP texts I've
got here, about 57% make it through the gauntlet (fewer if you demand
a vowel or disambiguator on every stack of a non-Tibetan tsheg bar).
This commit is contained in:
dchandler 2003-08-18 02:38:54 +00:00
parent 245aac4911
commit 1afb3a0fdd
12 changed files with 646 additions and 40 deletions

View file

@ -341,7 +341,7 @@ public final class LegalTshegBar
EWC_ta, EWC_tha, EWC_da, EWC_na,
EWC_pa, EWC_pha, EWC_ba, EWC_ma,
EWC_tsa, EWC_tsha, EWC_dza, EWC_wa,
EWC_zha, EWC_za, EWC_achung, EWC_ya,
EWC_zha, EWC_za, EWC_achung, EWC_ya,
EWC_ra, EWC_la, EWC_sha, EWC_sa,
EWC_ha, EWC_a
});
@ -833,7 +833,7 @@ public final class LegalTshegBar
return internalThrowThing(throwIfIllegal,
errorBuf,
"Illegal suffix -- not one of the ten legal suffixes: "
+ UnicodeUtils.unicodeCodepointToString(suffix.charAt(0)));
+ UnicodeUtils.unicodeCodepointToString(suffix.charAt(0), false));
}
}
}