ACIP->Unicode, without going through TMW, is now possible, so long as
\, the Sanskrit virama, is not used. Of the 1370-odd ACIP texts I've got here, about 57% make it through the gauntlet (fewer if you demand a vowel or disambiguator on every stack of a non-Tibetan tsheg bar).
This commit is contained in:
parent
245aac4911
commit
1afb3a0fdd
12 changed files with 646 additions and 40 deletions
|
@ -30,6 +30,15 @@ public class ACIPString {
|
|||
private int type;
|
||||
private String text;
|
||||
|
||||
/** Returns true if and only if an ACIPString with type type is to
|
||||
* be converted to Latin, not Tibetan, text. */
|
||||
public static boolean isLatin(int type) {
|
||||
return (type != TIBETAN_NON_PUNCTUATION
|
||||
&& type != TIBETAN_PUNCTUATION
|
||||
&& type != START_SLASH
|
||||
&& type != END_SLASH);
|
||||
}
|
||||
|
||||
/** For [#COMMENTS] */
|
||||
public static final int COMMENT = 0;
|
||||
/** For Folio markers like @012B */
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue