diff --git a/source/org/thdl/tib/text/DuffCode.java b/source/org/thdl/tib/text/DuffCode.java
index 01db664..dc04e92 100644
--- a/source/org/thdl/tib/text/DuffCode.java
+++ b/source/org/thdl/tib/text/DuffCode.java
@@ -26,23 +26,27 @@ import org.thdl.util.ThdlDebug;
* An immutable representation of a Tibetan glyph in the
* TibetanMachineWeb or TibetanMachine families of fonts.
*
-* A DuffCode consists of a font number, a character, and a character
-* number. A font identification and a character are sufficient to
-* uniquely identify any TibetanMachineWeb or TibetanMachine glyph.
+*
A DuffCode consists of a font number, a character, and a
+* character number. A font identification and a character are
+* sufficient to uniquely identify any TibetanMachineWeb or
+* TibetanMachine glyph. Whether a DuffCode represents a TM or TMW
+* glyph is in the eye of the beholder -- such information is not
+* intrinsically represented.
*
* @author Edward Garrett, Tibetan and Himalayan Digital Library
-* @version 1.0 */
+* @author David Chandler */
public final class DuffCode {
/**
-* the font number in which this glyph can be found,
-* from 1 (TibetanMachineWeb) to 10 (TibetanMachineWeb9).
-*/
- private int fontNum;
+* the font number in which this glyph can be found, from 1
+* (TibetanMachineWeb/TibetanMachine) ... to 5
+* (TibetanMachineWeb4/TibetanMachineSkt4) ... to 10
+* (TibetanMachineWeb9/[Invalid for TM family]). */
+ private byte fontNum;
/**
* the character value of this glyph, as an integer (that is, ordinal)
*/
- private int charNum;
+ private byte charNum;
/**
* Called by {@link TibetanMachineWeb} to generate
@@ -53,32 +57,33 @@ public final class DuffCode {
* and the other is the ASCII code of the character.
*
* @param s the string to parse
-* @param leftToRight should be true if the first number is the font number,
-* false if the second number is the font number
-*/
- public DuffCode(String s, boolean leftToRight) {
- StringTokenizer st = new StringTokenizer(s,",");
+* @param leftToRight should be true if the first number is the font
+* number, false if the second number is the font number */
+ public DuffCode(String s, boolean leftToRight) {
+ StringTokenizer st = new StringTokenizer(s,",");
- try {
- String val1 = st.nextToken();
- String val2 = st.nextToken();
+ try {
+ String val1 = st.nextToken();
+ String val2 = st.nextToken();
- Integer num1 = new Integer(val1);
- Integer num2 = new Integer(val2);
+ Integer num1 = new Integer(val1);
+ Integer num2 = new Integer(val2);
+ int n1val = num1.intValue();
+ int n2val = num2.intValue();
+ if (n1val > 255 || n1val < 0 || n2val > 255 || n2val < 0)
+ throw new NumberFormatException("FAILED ASSERTION: 0<=fontNum<=255 and 0<=charNum<=255");
- if (leftToRight) {
- setFontNum(num1.intValue());
- charNum = num2.intValue();
- }
- else {
- setFontNum(num2.intValue());
- charNum = num1.intValue();
- }
- }
- catch (NumberFormatException e) {
+ if (leftToRight) {
+ setFontNum(n1val);
+ setCharNum((char)n2val);
+ } else {
+ setFontNum(n2val);
+ setCharNum((char)n1val);
+ }
+ } catch (NumberFormatException e) {
ThdlDebug.noteIffyCode();
- }
- }
+ }
+ }
/**
* Called to create DuffCodes on the fly
@@ -87,50 +92,61 @@ public final class DuffCode {
* @param font the identifying number of the font
* @param ch a character
*/
- public DuffCode(int font, char ch) {
- setFontNum(font);
- charNum = (int)ch;
- }
+ public DuffCode(int font, char ch) {
+ setFontNum(font);
+ setCharNum(ch);
+ }
private void setFontNum(int font) {
if (!(font >= 1 && font <= 10))
throw new IllegalArgumentException("DuffCodes work with font numbers in the range [1, 5] or [1, 10]. This isn't in the range [1, 10]: " + font);
- fontNum = font;
+ fontNum = (byte)font;
}
/**
* Gets the font number of this glyph.
* @return the identifying font number for this DuffCode
*/
- public int getFontNum() {
- return fontNum;
- }
+ public byte getFontNum() {
+ return fontNum;
+ }
+
+ private void setCharNum(char x) {
+ short xs = (short)x;
+ if (xs >= 0 && xs <= 127)
+ charNum = (byte)xs;
+ else
+ charNum = (byte)(127-xs);
+ }
/**
* Gets the character for this glyph, as an integer.
* @return the identifying character, converted to an
* integer, for this DuffCode
*/
- public int getCharNum() {
- return charNum;
- }
+ public short getCharNum() {
+ if (charNum >= 0)
+ return (short)charNum; // [0, 127]
+ else
+ return (short)(127-(short)charNum); // [128, 255]
+ }
/**
* Gets the character for this glyph.
* @return the identifying character for this DuffCode
*/
- public char getCharacter() {
- return (char)charNum;
- }
+ public char getCharacter() {
+ return (char)getCharNum();
+ }
/**
* Assigns a hashcode based on the font number and character for this
* glyph.
*
* @return the hash code for this object */
- public int hashCode() {
- return fontNum*256 + charNum;
- }
+ public int hashCode() {
+ return ((int)fontNum)*256 + getCharNum();
+ }
/**
* Evaluates two DuffCodes as equal iff their
@@ -139,40 +155,40 @@ public final class DuffCode {
* @param o the object (DuffCode) you want to compare
* @return true if this object is equal to o, false if not
*/
- public boolean equals(Object o) {
- if (o instanceof DuffCode) {
- DuffCode dc = (DuffCode)o;
+ public boolean equals(Object o) {
+ if (o instanceof DuffCode) {
+ DuffCode dc = (DuffCode)o;
- if (fontNum == dc.fontNum && charNum == dc.charNum)
- return true;
- }
- return false;
- }
+ if (fontNum == dc.fontNum && charNum == dc.charNum)
+ return true;
+ }
+ return false;
+ }
/**
* @return a string representation of this object */
- public String toString() {
+ public String toString() {
boolean[] err = new boolean[] { false };
String wylie = TibetanMachineWeb.getWylieForGlyph(this, err);
if (err[0]) wylie = "undefined";
- return "";
- }
+ }
/**
* @param TMW if this DuffCode represents a TMW glyph, not a TM glyph
* @return a string representation of this object that does not refer
* to its Wylie representation (because the TMW->Wylie error messages
* call upon this when there is none, and you don't want an infinite
* recursion (manifesting as a StackOverflowError)) */
- public String toString(boolean TMW) {
+ public String toString(boolean TMW) {
boolean[] err = new boolean[] { false };
- return "";
- }
+ }
}
diff --git a/source/org/thdl/tib/text/TibetanMachineWeb.java b/source/org/thdl/tib/text/TibetanMachineWeb.java
index abb016a..b1d6cef 100644
--- a/source/org/thdl/tib/text/TibetanMachineWeb.java
+++ b/source/org/thdl/tib/text/TibetanMachineWeb.java
@@ -31,7 +31,7 @@ import org.thdl.util.ThdlDebug;
import org.thdl.util.ThdlLazyException;
import org.thdl.util.Trie;
import org.thdl.util.ThdlOptions;
-import org.thdl.tib.text.tshegbar.UnicodeCodepointToThdlWylie;
+import org.thdl.tib.text.tshegbar.UnicodeUtils;
/**
* Interfaces between Extended Wylie and the TibetanMachineWeb fonts.
@@ -41,9 +41,11 @@ import org.thdl.tib.text.tshegbar.UnicodeCodepointToThdlWylie;
* both or neither.
*
*
In addition, this class optionally loads the TibetanMachineWeb
-* fonts manually via {@link #readInTMWFontFiles()}.
+* fonts manually via {@link #readInTMWFontFiles()}. When we do that,
+* it means that users don't have to install the fonts on their
+* systems, so installation of Jskad becomes easier.
* @author Edward Garrett, Tibetan and Himalayan Digital Library
-* @version 1.0
+* @author David Chandler
*/
public class TibetanMachineWeb implements THDLWylieConstants {
/** This addresses bug 624133, "Input freezes after impossible
@@ -57,124 +59,132 @@ public class TibetanMachineWeb implements THDLWylieConstants {
private final static String anyOldObjectWillDo
= "this placeholder is useful for debugging; we need a nonnull Object anyway";
- private static TibetanKeyboard keyboard = null;
- private static Set charSet = null;
- private static Set tibSet = null;
- private static Set sanskritStackSet = null;
- private static Set numberSet = null;
- private static Set vowelSet = null;
- private static Set puncSet = null;
- private static Set topSet = null;
- private static Set leftSet = null;
- private static Set rightSet = null;
- private static Set farRightSet = null;
- private static Map tibHash = new HashMap();
- private static Map binduMap = new HashMap();
- private static String[][] toHashKey = new String[11][95]; //note: toHashKey[0][..] is not used
- private static DuffCode[][] TMtoTMW = new DuffCode[5][255-32]; // ordinal 255 doesn't occur in TM
- private static DuffCode[][] TMWtoTM = new DuffCode[10][127-32]; // ordinal 127 doesn't occur in TMW
- private static String[][] TMWtoUnicode = new String[10][127-32]; // ordinal 127 doesn't occur in TMW
- private static String fileName = "tibwn.ini";
- private static final String DELIMITER = "~";
+ private static TibetanKeyboard keyboard = null;
+ private static Set charSet = null;
+ private static Set tibSet = null;
+ private static Set sanskritStackSet = null;
+ private static Set numberSet = null;
+ private static Set vowelSet = null;
+ private static Set puncSet = null;
+ private static Set topSet = null;
+ private static Set leftSet = null;
+ private static Set rightSet = null;
+ private static Set farRightSet = null;
+ private static Map tibHash = new HashMap();
+ private static Map binduMap = new HashMap();
+ private static String[][] toHashKey = new String[11][95]; //note: toHashKey[0][..] is not used
+ private static DuffCode[][] TMtoTMW = new DuffCode[5][255-32]; // ordinal 255 doesn't occur in TM
+ private static DuffCode[][] TMWtoTM = new DuffCode[10][127-32]; // ordinal 127 doesn't occur in TMW
+ private static String[][] TMWtoUnicode = new String[10][127-32]; // ordinal 127 doesn't occur in TMW
+
+ /** For mapping single codepoints U+0F00..U+0FFF to TMW. This
+ won't handle 0F00, 0F02, 0F03, or 0F0E, which are made by
+ using multiple glyphs from TMW, but it handles all the rest.
+ It handles U+0F90-U+0FBC rather poorly, in that you have to
+ use special formatting to get those right (FIXME: warn
+ whenever they're used). */
+ private static DuffCode[][] UnicodeToTMW = new DuffCode[256][1];
+ private static String fileName = "tibwn.ini";
+ private static final String DELIMITER = "~";
/** vowels that appear over the glyph: */
- private static Set top_vowels;
+ private static Set top_vowels;
/** the font we use when we convert TMW->Unicode: */
- private static SimpleAttributeSet defaultUnicodeFontAttributeSet = null;
+ private static SimpleAttributeSet defaultUnicodeFontAttributeSet = null;
/** a way of encoding the choice of TibetanMachineWeb font from
that family of 10 fonts: */
- private static SimpleAttributeSet[] webFontAttributeSet = new SimpleAttributeSet[11];
+ private static SimpleAttributeSet[] webFontAttributeSet = new SimpleAttributeSet[11];
/** a way of encoding the choice of TibetanMachine font from
that family of 5 fonts: */
- private static SimpleAttributeSet[] normFontAttributeSet = new SimpleAttributeSet[6];
- private static boolean hasDisambiguatingKey; //to disambiguate gy and g.y=
- private static char disambiguating_key;
- private static boolean hasSanskritStackingKey; //for stacking Sanskrit
- private static boolean hasTibetanStackingKey; //for stacking Tibetan
- private static boolean isStackingMedial; //ie g+y, not +gy
- private static char stacking_key;
- private static boolean isAChenRequiredBeforeVowel;
- private static boolean isAChungConsonant;
- private static boolean hasAVowel;
- private static String aVowel;
+ private static SimpleAttributeSet[] normFontAttributeSet = new SimpleAttributeSet[6];
+ private static boolean hasDisambiguatingKey; //to disambiguate gy and g.y=
+ private static char disambiguating_key;
+ private static boolean hasSanskritStackingKey; //for stacking Sanskrit
+ private static boolean hasTibetanStackingKey; //for stacking Tibetan
+ private static boolean isStackingMedial; //ie g+y, not +gy
+ private static char stacking_key;
+ private static boolean isAChenRequiredBeforeVowel;
+ private static boolean isAChungConsonant;
+ private static boolean hasAVowel;
+ private static String aVowel;
// We use .intern() explicitly here so the code is easier to
// understand, but all string literals are interned.
- public static final String[] tmFontNames = {
- null,
- "TibetanMachine".intern(),
- "TibetanMachineSkt1".intern(),
- "TibetanMachineSkt2".intern(),
- "TibetanMachineSkt3".intern(),
- "TibetanMachineSkt4".intern()
- };
- public static final String[] tmwFontNames = {
- null,
- "TibetanMachineWeb".intern(),
- "TibetanMachineWeb1".intern(),
- "TibetanMachineWeb2".intern(),
- "TibetanMachineWeb3".intern(),
- "TibetanMachineWeb4".intern(),
- "TibetanMachineWeb5".intern(),
- "TibetanMachineWeb6".intern(),
- "TibetanMachineWeb7".intern(),
- "TibetanMachineWeb8".intern(),
- "TibetanMachineWeb9".intern()
- };
+ public static final String[] tmFontNames = {
+ null,
+ "TibetanMachine".intern(),
+ "TibetanMachineSkt1".intern(),
+ "TibetanMachineSkt2".intern(),
+ "TibetanMachineSkt3".intern(),
+ "TibetanMachineSkt4".intern()
+ };
+ public static final String[] tmwFontNames = {
+ null,
+ "TibetanMachineWeb".intern(),
+ "TibetanMachineWeb1".intern(),
+ "TibetanMachineWeb2".intern(),
+ "TibetanMachineWeb3".intern(),
+ "TibetanMachineWeb4".intern(),
+ "TibetanMachineWeb5".intern(),
+ "TibetanMachineWeb6".intern(),
+ "TibetanMachineWeb7".intern(),
+ "TibetanMachineWeb8".intern(),
+ "TibetanMachineWeb9".intern()
+ };
/**
* represents where in an array of DuffCodes you
* find the TibetanMachine equivalence of a glyph
*/
- public static final int TM = 0;
+ public static final int TM = 0;
/**
* represents where in an array of DuffCodes you
* find the reduced character equivalent of a TMW glyph
*/
- public static final int REDUCED_C = 1;
+ public static final int REDUCED_C = 1;
/**
* represents where in an array of DuffCodes you
* find the TibetanMachineWeb glyph
*/
- public static final int TMW = 2;
+ public static final int TMW = 2;
/**
* represents where in an array of DuffCodes you
* find the gigu value for a given glyph
*/
- public static final int VOWEL_i = 3;
+ public static final int VOWEL_i = 3;
/**
* represents where in an array of DuffCodes you
* find the zhebju value for a given glyph
*/
- public static final int VOWEL_u = 4;
+ public static final int VOWEL_u = 4;
/**
* represents where in an array of DuffCodes you
* find the drengbu value for a given glyph
*/
- public static final int VOWEL_e = 5;
+ public static final int VOWEL_e = 5;
/**
* represents where in an array of DuffCodes you
* find the naro value for a given glyph
*/
- public static final int VOWEL_o = 6;
+ public static final int VOWEL_o = 6;
/**
* represents where in an array of DuffCodes you
* find the achung value for a given glyph
*/
- public static final int VOWEL_A = 7;
+ public static final int VOWEL_A = 7;
/**
* represents where in an array of DuffCodes you
* find the achung + zhebju value for a given glyph
*/
- public static final int VOWEL_U = 8;
+ public static final int VOWEL_U = 8;
/**
* represents where in an array of DuffCodes you
* find the Unicode equivalence of a given glyph
*/
- public static final int UNICODE = 9;
+ public static final int UNICODE = 9;
/**
* represents where in an array of DuffCodes you
* find the half height equivalence of a given glyph
*/
- public static final int HALF_C = 10;
+ public static final int HALF_C = 10;
@@ -184,50 +194,50 @@ public class TibetanMachineWeb implements THDLWylieConstants {
// change TMW->Wylie.
/** comma-delimited list of supported Tibetan consonants: */
- private static final String tibetanConsonants
+ private static final String tibetanConsonants
= "k,kh,g,ng,c,ch,j,ny,t,th,d,n,p,ph,b,m,ts,tsh,dz,w,zh,z,',y,r,l,sh,s,h,a";
/** comma-delimited list of supported non-Tibetan consonants, such
* as Sanskrit consonants: */
- private static final String otherConsonants // va and fa are treated pretty-much like Sanskrit.
+ private static final String otherConsonants // va and fa are treated pretty-much like Sanskrit.
= "T,Th,D,N,Sh,v,f";
/** comma-delimited list of supported numbers (superscribed,
subscribed, normal, half-numerals): */
- private static final String numbers
+ private static final String numbers
= "0,1,2,3,4,5,6,7,8,9";
/** comma-delimited list of supported punctuation and
miscellaneous characters: */
- private static final String others
+ private static final String others
= "_, ,/,|,!,:,;,@,#,$,%,(,),H,M,`,&,@#,?,=,[,],{,},*,~X,X"; // FIXME: not yet supporting all these...
/** comma-delimited list of supported vowels: */
- private static final String vowels
+ private static final String vowels
= "a,i,u,e,o,I,U,ai,au,A,-i,-I";
/** comma-delimited list of head letters (superscribed letters) */
- private static final String tops = "r,s,l";
+ private static final String tops = "r,s,l";
/** comma-delimited list of prefixes */
- private static final String lefts = "g,d,b,m,'";
+ private static final String lefts = "g,d,b,m,'";
/** comma-delimited list of suffixes */
- private static final String rights = "g,ng,d,n,b,m,r,l,s,',T";
+ private static final String rights = "g,ng,d,n,b,m,r,l,s,',T";
/** comma-delimited list of postsuffixes. nga was here in the
* past, according to Edward, to handle cases like ya'ng. pa'am
* wasn't considered, but had it been, ma probably would've gone
* here too. We now handle 'am, 'ang, etc. specially, so now
* this set is now just the postsuffixes. */
- private static final String farrights = "d,s";
+ private static final String farrights = "d,s";
- static {
- readData();
+ static {
+ readData();
/* Initialize to Extended Wylie keyboard. The preferences
* mechanism will switch this to the preferred keyboard. */
setKeyboard(keyboard);
- }
+ }
/** If the TMW font files are resources associated with this
* class, those font files are loaded. This means that the user
@@ -311,7 +321,7 @@ public class TibetanMachineWeb implements THDLWylieConstants {
* the character, punctuation, and vowel lists, as well as
* performing other acts of initialization.
*/
- private static void readData() {
+ private static void readData() {
if (!ThdlOptions.getBooleanOption("thdl.rely.on.system.tmw.fonts")) {
readInTMWFontFiles();
}
@@ -323,86 +333,86 @@ public class TibetanMachineWeb implements THDLWylieConstants {
StyleConstants.setFontFamily(defaultUnicodeFontAttributeSet,
"Ximalaya");
- webFontAttributeSet[0] = null;
- for (int i=1; i so that Jskad has the same
// TMW->Wylie conversion regardless of whether or not it
// chooses to support inputting numbers. Likewise for
// tibetanConsonants, otherConsonants, others, and vowels.
String ntk;
- charSet.add(ntk = sTok.nextToken());
+ charSet.add(ntk = sTok.nextToken());
numberSet.add(ntk);
validInputSequences.put(ntk, anyOldObjectWillDo);
}
@@ -419,165 +429,165 @@ public class TibetanMachineWeb implements THDLWylieConstants {
sTok = null;
- top_vowels = new HashSet();
- top_vowels.add(i_VOWEL);
- top_vowels.add(e_VOWEL);
- top_vowels.add(o_VOWEL);
- top_vowels.add(ai_VOWEL);
- top_vowels.add(au_VOWEL);
- top_vowels.add(reverse_i_VOWEL);
+ top_vowels = new HashSet();
+ top_vowels.add(i_VOWEL);
+ top_vowels.add(e_VOWEL);
+ top_vowels.add(o_VOWEL);
+ top_vowels.add(ai_VOWEL);
+ top_vowels.add(au_VOWEL);
+ top_vowels.add(reverse_i_VOWEL);
- try {
- URL url = TibetanMachineWeb.class.getResource(fileName);
- if (url == null) {
- System.err.println("Cannot find " + fileName + "; aborting.");
- System.exit(1);
- }
- InputStreamReader isr = new InputStreamReader(url.openStream());
- BufferedReader in = new BufferedReader(isr);
+ try {
+ URL url = TibetanMachineWeb.class.getResource(fileName);
+ if (url == null) {
+ System.err.println("Cannot find " + fileName + "; aborting.");
+ System.exit(1);
+ }
+ InputStreamReader isr = new InputStreamReader(url.openStream());
+ BufferedReader in = new BufferedReader(isr);
if (ThdlOptions.getBooleanOption("thdl.verbose")) {
System.out.println("Reading Tibetan Machine Web code table "
+ fileName);
}
- String line;
- boolean hashOn = false;
+ String line;
+ boolean hashOn = false;
// is this a Tibetan consonant or consonant stack?
- boolean isTibetan = false;
+ boolean isTibetan = false;
// is this a Sanskrit consonant stack?
- boolean isSanskrit = false;
+ boolean isSanskrit = false;
- boolean ignore = false;
+ boolean ignore = false;
- while ((line = in.readLine()) != null) {
- if (line.startsWith("")) { //line is command
- if (line.equalsIgnoreCase("")) {
- isSanskrit = false;
- isTibetan = true;
- hashOn = false;
+ while ((line = in.readLine()) != null) {
+ if (line.startsWith("")) { //line is command
+ if (line.equalsIgnoreCase("")) {
+ isSanskrit = false;
+ isTibetan = true;
+ hashOn = false;
ignore = false;
do {
line = in.readLine();
} while (line.startsWith("//") || line.equals(""));
// use tibSet or charSet; ignore this.
}
- else if (line.equalsIgnoreCase("")) {
+ else if (line.equalsIgnoreCase("")) {
// FIXME: for historical reasons, numbers go
// in both charSet and numberSet.
- isSanskrit = false;
- isTibetan = false;
- hashOn = false;
+ isSanskrit = false;
+ isTibetan = false;
+ hashOn = false;
ignore = false;
do {
line = in.readLine();
} while (line.startsWith("//") || line.equals(""));
// use numberSet or charSet; ignore this.
}
- else if (line.equalsIgnoreCase("")) {
- isSanskrit = false;
- isTibetan = false;
- hashOn = false;
+ else if (line.equalsIgnoreCase("")) {
+ isSanskrit = false;
+ isTibetan = false;
+ hashOn = false;
ignore = false;
do {
line = in.readLine();
} while (line.startsWith("//") || line.equals(""));
// use vowelSet; ignore this.
- }
- else if (line.equalsIgnoreCase("")) {
- isSanskrit = false;
- isTibetan = false;
- hashOn = false;
+ }
+ else if (line.equalsIgnoreCase("")) {
+ isSanskrit = false;
+ isTibetan = false;
+ hashOn = false;
ignore = false;
do {
line = in.readLine();
} while (line.startsWith("//") || line.equals(""));
// use puncSet; ignore this.
- }
+ }
- else if (line.equalsIgnoreCase("")
+ else if (line.equalsIgnoreCase("")
|| line.equalsIgnoreCase("")) {
- isSanskrit = false;
- isTibetan = false;
- hashOn = true;
- ignore = false;
+ isSanskrit = false;
+ isTibetan = false;
+ hashOn = true;
+ ignore = false;
}
else if (line.equalsIgnoreCase("")) {
- isSanskrit = false;
- isTibetan = true;
- hashOn = true;
- ignore = false;
- }
- else if (line.equalsIgnoreCase("")) {
- isSanskrit = false;
- isTibetan = false;
- hashOn = true;
- ignore = false;
- }
- else if (line.equalsIgnoreCase("")) {
- isSanskrit = true;
- isTibetan = false;
- hashOn = true;
- ignore = false;
- }
- else if (line.equalsIgnoreCase("")) {
- isSanskrit = false;
- isTibetan = false;
- hashOn = false;
- ignore = false;
- }
- else if (line.equalsIgnoreCase("")) {
- isSanskrit = false;
- ignore = true;
+ isSanskrit = false;
+ isTibetan = true;
+ hashOn = true;
+ ignore = false;
+ }
+ else if (line.equalsIgnoreCase("")) {
+ isSanskrit = false;
+ isTibetan = false;
+ hashOn = true;
+ ignore = false;
+ }
+ else if (line.equalsIgnoreCase("")) {
+ isSanskrit = true;
+ isTibetan = false;
+ hashOn = true;
+ ignore = false;
+ }
+ else if (line.equalsIgnoreCase("")) {
+ isSanskrit = false;
+ isTibetan = false;
+ hashOn = false;
+ ignore = false;
+ }
+ else if (line.equalsIgnoreCase("")) {
+ isSanskrit = false;
+ ignore = true;
}
- }
- else if (line.startsWith("//")) { //comment
- ;
}
- else if (line.equals("")) {//empty string
- ;
+ else if (line.startsWith("//")) { //comment
+ ;
}
- else {
- StringTokenizer st = new StringTokenizer(line,DELIMITER,true);
+ else if (line.equals("")) {//empty string
+ ;
+ }
+ else {
+ StringTokenizer st = new StringTokenizer(line,DELIMITER,true);
- String wylie = null;
+ String wylie = null;
DuffCode[] duffCodes;
duffCodes = new DuffCode[11];
- int k = 0;
+ int k = 0;
StringBuffer escapedToken = new StringBuffer("");
ThdlDebug.verify(escapedToken.length() == 0);
- while (st.hasMoreTokens()) {
- String val = getEscapedToken(st, escapedToken);
+ while (st.hasMoreTokens()) {
+ String val = getEscapedToken(st, escapedToken);
- if (val.equals(DELIMITER)
+ if (val.equals(DELIMITER)
&& escapedToken.length() == 0) {
- k++;
+ k++;
} else if (!val.equals("")) {
if (escapedToken.length() != 0) {
escapedToken = new StringBuffer("");
ThdlDebug.verify(escapedToken.length() == 0);
}
- switch (k) {
- case 0: //wylie key
+ switch (k) {
+ case 0: //wylie key
wylie = val;
- break;
+ break;
- case 1: // Tibetan Machine glyph
- duffCodes[TM] = new DuffCode(val,false);
- break;
+ case 1: // Tibetan Machine glyph
+ duffCodes[TM] = new DuffCode(val,false);
+ break;
- case 2: //reduced-size character if there is one
+ case 2: //reduced-size character if there is one
if (!ignore) {
duffCodes[REDUCED_C] = new DuffCode(val,true);
}
- break;
+ break;
- case 3: //TibetanMachineWeb code
- duffCodes[TMW] = new DuffCode(val,true);
+ case 3: //TibetanMachineWeb code
+ duffCodes[TMW] = new DuffCode(val,true);
// TibetanMachineWeb7.91, for
// example, has no TM(win32)
// equivalent (though it has a
@@ -593,14 +603,14 @@ public class TibetanMachineWeb implements THDLWylieConstants {
// could well be null):
TMWtoTM[duffCodes[TMW].getFontNum()-1][duffCodes[TMW].getCharNum()-32]
= duffCodes[TM]; // TMW->TM mapping
- break;
+ break;
// Vowels etc. to use with this glyph:
- case 4:
- case 5:
- case 6:
- case 7:
- case 8:
- case 9:
+ case 4:
+ case 5:
+ case 6:
+ case 7:
+ case 8:
+ case 9:
if (!ignore) {
try {
duffCodes[k-1] = new DuffCode(val,true);
@@ -610,7 +620,7 @@ public class TibetanMachineWeb implements THDLWylieConstants {
}
break;
- case 10: //Unicode:
+ case 10: //Unicode:
if (!val.equals("none")) {
StringBuffer unicodeBuffer = new StringBuffer();
StringTokenizer uTok = new StringTokenizer(val, ",");
@@ -632,13 +642,51 @@ public class TibetanMachineWeb implements THDLWylieConstants {
}
TMWtoUnicode[duffCodes[TMW].getFontNum()-1][duffCodes[TMW].getCharNum()-32]
= unicodeBuffer.toString(); // TMW->Unicode mapping
+ char ch;
+ if (unicodeBuffer.length() == 1
+ && UnicodeUtils.isInTibetanRange(ch = unicodeBuffer.charAt(0))) {
+ if (null != UnicodeToTMW[ch - '\u0F00'][0]
+ && '\u0F00' != ch
+ && '\u0F02' != ch
+ && '\u0F03' != ch
+ && '\u0F0B' != ch // any will do...
+ && '\u0F0E' != ch
+ && '\u0F40' != ch
+ && '\u0F42' != ch
+ && '\u0F49' != ch
+ && '\u0F4F' != ch
+ && '\u0F51' != ch
+ && '\u0F53' != ch
+ && '\u0F5E' != ch
+ && '\u0F62' != ch
+ && '\u0F64' != ch
+ && '\u0F67' != ch
+ && '\u0F6A' != ch
+ && '\u0F71' != ch // any will do...
+ && '\u0F72' != ch // any will do...
+ && '\u0F73' != ch
+ && '\u0F74' != ch // any will do...
+ && '\u0F75' != ch // any will do...
+ && '\u0F76' != ch
+ && '\u0F77' != ch
+ && '\u0F78' != ch
+ && '\u0F79' != ch
+ && '\u0F7A' != ch // any will do...
+ && '\u0F7C' != ch // any will do...
+ && '\u0F7E' != ch
+ && '\u0F81' != ch) {
+ throw new Error("tibwn.ini has more than one TMW fellow listed that has the Unicode " + val + ", but it's not on the list of specially handled glyphs");
+ }
+ UnicodeToTMW[ch - '\u0F00'][0]
+ = duffCodes[TMW]; // Unicode->TMW mapping
+ }
// For V&V:
// DLC FIXME: also check for ^[90-bc]. and ^.+[40-6a]
// StringBuffer wylie_minus_plusses_buf
-// = UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeString(unicodeBuffer.toString());
+// = org.thdl.tib.text.tshegbar.UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeString(unicodeBuffer.toString());
// String wylie_minus_plusses
// = ((wylie_minus_plusses_buf == null)
// ? null
@@ -651,29 +699,29 @@ public class TibetanMachineWeb implements THDLWylieConstants {
// System.out.println("wylie: " + wylie + "; wylie_minus_plusses: " + wylie_minus_plusses);
// }
}
- break;
+ break;
- case 11: //half-height character if there is one
+ case 11: //half-height character if there is one
if (!ignore) {
duffCodes[HALF_C] = new DuffCode(val,true);
}
- break;
+ break;
- case 12: //special bindu-value if vowel+bindu are one glyph
+ case 12: //special bindu-value if vowel+bindu are one glyph
if (!ignore) {
DuffCode binduCode = new DuffCode(val,true);
binduMap.put(duffCodes[TMW],binduCode);
}
- break;
+ break;
case 13:
throw new Error("tibwn.ini has only 13 columns, you tried to use a 14th column.");
- }
- } else {
+ }
+ } else {
if (k == 10) {
throw new Error("needed none or some unicode; line is " + line);
}
}
- }
+ }
if (k < 10) {
throw new Error("needed none or some unicode; line is " + line);
}
@@ -711,14 +759,14 @@ public class TibetanMachineWeb implements THDLWylieConstants {
int code = duffCodes[TMW].getCharNum()-32;
toHashKey[font][code] = wylie;
}
- }
- }
- }
- catch (IOException e) {
- System.out.println("file Disappeared");
+ }
+ }
+ }
+ catch (IOException e) {
+ System.out.println("file Disappeared");
ThdlDebug.noteIffyCode();
- }
- }
+ }
+ }
/**
* (Re-)sets the keyboard.
@@ -728,41 +776,41 @@ public class TibetanMachineWeb implements THDLWylieConstants {
* if there was an error
*/
public static boolean setKeyboard(TibetanKeyboard kb) {
- keyboard = kb;
+ keyboard = kb;
- if (currentKeyboardIsExtendedWylie()) { //wylie keyboard
- hasDisambiguatingKey = true;
- disambiguating_key = WYLIE_DISAMBIGUATING_KEY;
- hasSanskritStackingKey = true;
- hasTibetanStackingKey = false;
- isStackingMedial = true;
- stacking_key = WYLIE_SANSKRIT_STACKING_KEY;
- isAChenRequiredBeforeVowel = false;
- isAChungConsonant = false;
- hasAVowel = true;
- aVowel = WYLIE_aVOWEL;
- if (!vowelSet.contains(WYLIE_aVOWEL)) {
- vowelSet.add(WYLIE_aVOWEL);
+ if (currentKeyboardIsExtendedWylie()) { //wylie keyboard
+ hasDisambiguatingKey = true;
+ disambiguating_key = WYLIE_DISAMBIGUATING_KEY;
+ hasSanskritStackingKey = true;
+ hasTibetanStackingKey = false;
+ isStackingMedial = true;
+ stacking_key = WYLIE_SANSKRIT_STACKING_KEY;
+ isAChenRequiredBeforeVowel = false;
+ isAChungConsonant = false;
+ hasAVowel = true;
+ aVowel = WYLIE_aVOWEL;
+ if (!vowelSet.contains(WYLIE_aVOWEL)) {
+ vowelSet.add(WYLIE_aVOWEL);
validInputSequences.put(WYLIE_aVOWEL, anyOldObjectWillDo);
}
- }
- else {
- hasDisambiguatingKey = keyboard.hasDisambiguatingKey();
- if (hasDisambiguatingKey)
- disambiguating_key = keyboard.getDisambiguatingKey();
+ }
+ else {
+ hasDisambiguatingKey = keyboard.hasDisambiguatingKey();
+ if (hasDisambiguatingKey)
+ disambiguating_key = keyboard.getDisambiguatingKey();
- hasSanskritStackingKey = keyboard.hasSanskritStackingKey();
- hasTibetanStackingKey = keyboard.hasTibetanStackingKey();
- if (hasSanskritStackingKey || hasTibetanStackingKey) {
- isStackingMedial = keyboard.isStackingMedial();
- stacking_key = keyboard.getStackingKey();
- }
+ hasSanskritStackingKey = keyboard.hasSanskritStackingKey();
+ hasTibetanStackingKey = keyboard.hasTibetanStackingKey();
+ if (hasSanskritStackingKey || hasTibetanStackingKey) {
+ isStackingMedial = keyboard.isStackingMedial();
+ stacking_key = keyboard.getStackingKey();
+ }
- isAChenRequiredBeforeVowel = keyboard.isAChenRequiredBeforeVowel();
- isAChungConsonant = keyboard.isAChungConsonant();
- hasAVowel = keyboard.hasAVowel();
- }
- return true;
+ isAChenRequiredBeforeVowel = keyboard.isAChenRequiredBeforeVowel();
+ isAChungConsonant = keyboard.isAChungConsonant();
+ hasAVowel = keyboard.hasAVowel();
+ }
+ return true;
}
/**
@@ -774,18 +822,18 @@ public static boolean setKeyboard(TibetanKeyboard kb) {
* if there was an error
*/
public static boolean setKeyboard(URL url) {
- try {
+ try {
TibetanKeyboard kb = new TibetanKeyboard(url);
- if (setKeyboard(kb))
- return true;
- else
- return false;
- }
- catch (TibetanKeyboard.InvalidKeyboardException ike) {
- System.out.println("can't create the keyboard associated with " + url);
+ if (setKeyboard(kb))
+ return true;
+ else
+ return false;
+ }
+ catch (TibetanKeyboard.InvalidKeyboardException ike) {
+ System.out.println("can't create the keyboard associated with " + url);
ThdlDebug.noteIffyCode();
- return false;
- }
+ return false;
+ }
}
/**
@@ -799,10 +847,10 @@ public static boolean setKeyboard(URL url) {
* a way of encoding the font itself
*/
public static SimpleAttributeSet getAttributeSet(int font) {
- if (font > -1 && font < webFontAttributeSet.length)
- return webFontAttributeSet[font];
- else
- return null;
+ if (font > -1 && font < webFontAttributeSet.length)
+ return webFontAttributeSet[font];
+ else
+ return null;
}
/**
@@ -842,10 +890,10 @@ private static HashMap unicodeAttributeSets = new HashMap();
* a way of encoding the font itself
*/
public static SimpleAttributeSet getAttributeSetTM(int font) {
- if (font > -1 && font < normFontAttributeSet.length)
- return normFontAttributeSet[font];
- else
- return null;
+ if (font > -1 && font < normFontAttributeSet.length)
+ return normFontAttributeSet[font];
+ else
+ return null;
}
/**
@@ -855,17 +903,17 @@ public static SimpleAttributeSet getAttributeSetTM(int font) {
* ENTER), false if not
*/
public static boolean isFormatting(char c) {
- if (c < 32 || c > 126)
- return true;
- else
- return false;
+ if (c < 32 || c > 126)
+ return true;
+ else
+ return false;
/*
- if ( c == KeyEvent.VK_TAB
- || c == KeyEvent.VK_ENTER)
+ if ( c == KeyEvent.VK_TAB
+ || c == KeyEvent.VK_ENTER)
- return true;
- else
- return false;
+ return true;
+ else
+ return false;
*/
}
@@ -878,10 +926,10 @@ public static boolean isFormatting(char c) {
* @return true if s is a character in the current keyboard, false if
* not */
public static boolean isChar(String s) {
- if (currentKeyboardIsExtendedWylie())
- return charSet.contains(s);
- else
- return keyboard.isChar(s);
+ if (currentKeyboardIsExtendedWylie())
+ return charSet.contains(s);
+ else
+ return keyboard.isChar(s);
}
/**
@@ -892,7 +940,7 @@ public static boolean isChar(String s) {
* @return true if s is a character in Extended Wylie transliteration,
* false if not */
public static boolean isWylieChar(String s) {
- return charSet.contains(s);
+ return charSet.contains(s);
}
@@ -904,7 +952,7 @@ public static boolean isWylieChar(String s) {
* @return true if s is such in Extended Wylie transliteration, false
* if not */
public static boolean isWylieTibetanConsonantOrConsonantStack(String s) {
- return tibSet.contains(s);
+ return tibSet.contains(s);
}
/**
@@ -912,7 +960,7 @@ public static boolean isWylieTibetanConsonantOrConsonantStack(String s) {
* Sanskrit multi-consonant stack.
*/
public static boolean isWylieSanskritConsonantStack(String s) {
- return sanskritStackSet.contains(s);
+ return sanskritStackSet.contains(s);
}
/** Returns true if and only if s is the THDL Extended Wylie
@@ -943,7 +991,7 @@ public static boolean isWylieAchungAppendage(String s) {
* @return true if s is a number in Extended Wylie transliteration,
* false if not */
public static boolean isWylieNumber(String s) {
- return numberSet.contains(s);
+ return numberSet.contains(s);
}
/**
@@ -954,10 +1002,10 @@ public static boolean isWylieNumber(String s) {
* keyboard, false if not
*/
public static boolean isPunc(String s) {
- if (currentKeyboardIsExtendedWylie())
- return puncSet.contains(s);
- else
- return keyboard.isPunc(s);
+ if (currentKeyboardIsExtendedWylie())
+ return puncSet.contains(s);
+ else
+ return keyboard.isPunc(s);
}
/**
@@ -968,7 +1016,7 @@ public static boolean isPunc(String s) {
* Extended Wylie transliteration, false if not
*/
public static boolean isWyliePunc(String s) {
- return puncSet.contains(s);
+ return puncSet.contains(s);
}
/**
@@ -979,10 +1027,10 @@ public static boolean isWyliePunc(String s) {
* keyboard, false if not
*/
public static boolean isVowel(String s) {
- if (currentKeyboardIsExtendedWylie())
- return vowelSet.contains(s);
- else
- return keyboard.isVowel(s);
+ if (currentKeyboardIsExtendedWylie())
+ return vowelSet.contains(s);
+ else
+ return keyboard.isVowel(s);
}
/** By example, this returns true for le, lA-i, lA-iM, luM, l-i, etc.,
@@ -1051,7 +1099,7 @@ public static boolean isAmbiguousWylie(String x, String y) {
* Extended Wylie transliteration, false if not
*/
public static boolean isWylieVowel(String s) {
- return vowelSet.contains(s);
+ return vowelSet.contains(s);
}
/** Returns true if and only if wylie is the THDL Extended Wylie for
@@ -1060,7 +1108,7 @@ public static boolean isWylieVowel(String s) {
bindu. Note that an adornment might be both an adornment and a
vowel, or an adornment and punctuation. */
public static boolean isWylieAdornment(String wylie) {
- return (vowelSet.contains(wylie)
+ return (vowelSet.contains(wylie)
|| (wylie.equals("M") /* U+0F7E */
|| wylie.equals("M^") /* U+0F83 */
|| wylie.equals("iM")
@@ -1075,7 +1123,7 @@ public static boolean isWylieAdornment(String wylie) {
an adornment {@link #isWylieAdornment(String)} that contains a
vowel within it. */
public static boolean isWylieAdornmentAndContainsVowel(String wylie) {
- return (isWylieAdornment(wylie) &&
+ return (isWylieAdornment(wylie) &&
!wylie.equals("M") /* U+0F7E */
&& !wylie.equals("M^") /* U+0F83 */);
}
@@ -1089,7 +1137,7 @@ public static boolean isWylieAdornmentAndContainsVowel(String wylie) {
* @return true if s is a possible leftmost character in a Tibetan
* syllable, false if not. */
public static boolean isWylieLeft(String s) {
- return leftSet.contains(s);
+ return leftSet.contains(s);
}
/**
@@ -1101,7 +1149,7 @@ public static boolean isWylieLeft(String s) {
* @return true if s is a possible right character in a Tibetan
* syllable, false if not. */
public static boolean isWylieRight(String s) {
- return rightSet.contains(s);
+ return rightSet.contains(s);
}
/**
@@ -1111,7 +1159,7 @@ public static boolean isWylieRight(String s) {
* @return true if s is a possible postsuffix in a Tibetan
* syllable, false if not. */
public static boolean isWylieFarRight(String s) {
- return farRightSet.contains(s);
+ return farRightSet.contains(s);
}
/**
@@ -1121,7 +1169,7 @@ public static boolean isWylieFarRight(String s) {
* @return true if s is a possible superscribed letter in a Tibetan
* syllable, false if not. */
public static boolean isWylieTop(String s) {
- return topSet.contains(s);
+ return topSet.contains(s);
}
/**
@@ -1134,10 +1182,10 @@ public static boolean isWylieTop(String s) {
* @see TibetanKeyboard
*/
public static String getWylieForChar(String s) {
- if (currentKeyboardIsExtendedWylie())
- return s;
+ if (currentKeyboardIsExtendedWylie())
+ return s;
- return keyboard.getWylieForChar(s);
+ return keyboard.getWylieForChar(s);
}
/** Returns true iff the currently active keyboard is the
@@ -1163,10 +1211,10 @@ public static String getWylieForChar(String s) {
* @see TibetanKeyboard
*/
public static String getWylieForPunc(String s) {
- if (currentKeyboardIsExtendedWylie())
- return s;
+ if (currentKeyboardIsExtendedWylie())
+ return s;
- return keyboard.getWylieForPunc(s);
+ return keyboard.getWylieForPunc(s);
}
/**
@@ -1179,10 +1227,10 @@ public static String getWylieForPunc(String s) {
* @see TibetanKeyboard
*/
public static String getWylieForVowel(String s) {
- if (currentKeyboardIsExtendedWylie())
- return s;
+ if (currentKeyboardIsExtendedWylie())
+ return s;
- return keyboard.getWylieForVowel(s);
+ return keyboard.getWylieForVowel(s);
}
/**
@@ -1198,12 +1246,12 @@ public static String getWylieForVowel(String s) {
* @see DuffCode
* @see TibTextUtils#getVowel(List,DuffCode,DuffCode,String) */
public static DuffCode getVowel(String hashKey, int vowel) {
- DuffCode[] dc = (DuffCode[])tibHash.get(hashKey);
-
- if (null == dc)
- return null;
+ DuffCode[] dc = (DuffCode[])tibHash.get(hashKey);
+
+ if (null == dc)
+ return null;
- return dc[vowel]; //either a vowel or null
+ return dc[vowel]; //either a vowel or null
}
/**
@@ -1214,10 +1262,10 @@ public static DuffCode getVowel(String hashKey, int vowel) {
* hashKey, false if not
*/
public static boolean hasGlyph(String hashKey) {
- if (tibHash.get(hashKey)==null)
- return false;
- else
- return true;
+ if (tibHash.get(hashKey)==null)
+ return false;
+ else
+ return true;
}
/** Returns the Unicode correspondence for the Wylie wylie, which must
@@ -1233,7 +1281,7 @@ public static String getUnicodeForWylieForGlyph(String wylie) {
* Returns true if and only if hashKey is a known hash key from tibwn.ini.
*/
public static boolean isKnownHashKey(String hashKey) {
- DuffCode[] dc = (DuffCode[])tibHash.get(hashKey);
+ DuffCode[] dc = (DuffCode[])tibHash.get(hashKey);
return (null != dc);
}
@@ -1246,10 +1294,10 @@ public static boolean isKnownHashKey(String hashKey) {
* @see DuffCode
*/
public static DuffCode getGlyph(String hashKey) {
- DuffCode[] dc = (DuffCode[])tibHash.get(hashKey);
+ DuffCode[] dc = (DuffCode[])tibHash.get(hashKey);
if (null == dc)
throw new Error("Hash key " + hashKey + " not found; it is likely that you misconfigured tibwn.ini such that, say, M is expected (i.e., it is listed as, e.g. punctuation), but no 'M~...' line appears.");
- return dc[TMW];
+ return dc[TMW];
}
/**
@@ -1261,11 +1309,11 @@ public static DuffCode getGlyph(String hashKey) {
* @see DuffCode
*/
public static DuffCode getHalfHeightGlyph(String hashKey) {
- DuffCode[] dc = (DuffCode[])tibHash.get(hashKey);
- if (dc == null)
- return null;
+ DuffCode[] dc = (DuffCode[])tibHash.get(hashKey);
+ if (dc == null)
+ return null;
- return dc[REDUCED_C];
+ return dc[REDUCED_C];
}
private static final DuffCode TMW_cr = new DuffCode(1, '\r');
@@ -1332,7 +1380,7 @@ public static DuffCode mapTMtoTMW(int font, int ordinal, int suggestedFont) {
if ((0 != suggestedFont) && (32 == ordinal || 45 == ordinal)) {
return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it wastes heap
}
- return TMtoTMW[font][ordinal-32];
+ return TMtoTMW[font][ordinal-32];
}
private static final DuffCode TM_cr = new DuffCode(1, '\r');
@@ -1391,7 +1439,7 @@ public static DuffCode mapTMWtoTM(int font, int ordinal, int suggestedFont) {
return new DuffCode(suggestedFont, (char)ordinal); // FIXME: don't create a new one each time; it wastes heap
}
DuffCode ans = TMWtoTM[font][ordinal-32];
- return ans;
+ return ans;
}
/** Tests the TMW->TM and TM->TMW mappings. */
@@ -1616,6 +1664,115 @@ private static final String Unicode_lf = "\n";
private static final String Unicode_tab = "\t";
+ private static final DuffCode[] tmwFor0F00
+ = new DuffCode[] { new DuffCode(1, (char)63), new DuffCode(8, (char)102) };
+ private static final DuffCode[] tmwFor0F02
+ = new DuffCode[] { new DuffCode(1, (char)56), new DuffCode(1, (char)118), new DuffCode(8, (char)95), new DuffCode(8, (char)92) };
+ private static final DuffCode[] tmwFor0F03
+ = new DuffCode[] { new DuffCode(1, (char)56), new DuffCode(1, (char)118), new DuffCode(8, (char)95), new DuffCode(1, (char)105) };
+ private static final DuffCode[] tmwFor0F0E
+ = new DuffCode[] { new DuffCode(1, (char)107), new DuffCode(1, (char)107) };
+ // for 0F40, use the full-height, not the reduced-height, form
+ private static final DuffCode[] tmwFor0F40
+ = new DuffCode[] { new DuffCode(1, (char)92) };
+ private static final DuffCode[] tmwFor0F42
+ = new DuffCode[] { new DuffCode(1, (char)93) };
+ private static final DuffCode[] tmwFor0F49
+ = new DuffCode[] { new DuffCode(1, (char)94) };
+ private static final DuffCode[] tmwFor0F4F
+ = new DuffCode[] { new DuffCode(1, (char)95) };
+ private static final DuffCode[] tmwFor0F51
+ = new DuffCode[] { new DuffCode(1, (char)96) };
+ private static final DuffCode[] tmwFor0F53
+ = new DuffCode[] { new DuffCode(1, (char)97) };
+ private static final DuffCode[] tmwFor0F5E
+ = new DuffCode[] { new DuffCode(1, (char)98) };
+ private static final DuffCode[] tmwFor0F62
+ = new DuffCode[] { new DuffCode(8, (char)66) }; // not the full-form, use \u0F6A for that...
+ private static final DuffCode[] tmwFor0F64
+ = new DuffCode[] { new DuffCode(1, (char)99) };
+ private static final DuffCode[] tmwFor0F67
+ = new DuffCode[] { new DuffCode(1, (char)100) };
+ private static final DuffCode[] tmwFor0F6A
+ = new DuffCode[] { new DuffCode(1, (char)58) };
+ private static final DuffCode[] tmwFor0F73
+ = new DuffCode[] { new DuffCode(4, (char)106), new DuffCode(1, (char)109) };
+ private static final DuffCode[] tmwFor0F76
+ = new DuffCode[] { new DuffCode(8, (char)71), new DuffCode(8, (char)87) };
+ private static final DuffCode[] tmwFor0F77
+ = new DuffCode[] { new DuffCode(8, (char)71), new DuffCode(4, (char)106), new DuffCode(8, (char)87) };
+ private static final DuffCode[] tmwFor0F78
+ = new DuffCode[] { new DuffCode(10, (char)105), new DuffCode(8, (char)87) };
+ private static final DuffCode[] tmwFor0F79
+ = new DuffCode[] { new DuffCode(10, (char)105), new DuffCode(4, (char)106), new DuffCode(8, (char)87) };
+ private static final DuffCode[] tmwFor0F7E
+ = new DuffCode[] { new DuffCode(8, (char)91) }; // the one that lines up better -- i.e., not (8, (char)90)
+ private static final DuffCode[] tmwFor0F81
+ = new DuffCode[] { new DuffCode(4, (char)106), new DuffCode(8, (char)87) };
+
+ /** Returns an array of one, two, three, or four DuffCodes that
+ together represent the Tibetan Unicode character ch.
+ Returns null if there is no mapping for ch. For
+ certain codepoints, multiple TMW glyphs are appropriate, and
+ we return an arbitrary one. */
+ public static DuffCode[] mapUnicodeToTMW(char ch) {
+ // FIXME WARN WHENEVER AN ESCAPE IS USED FOR: f71, f72, f73, f74, f75, f76, f77, f78, f79, f7a, f7c, f81
+
+ // For U+0F71, U+0F72, U+0F74, U+0F75, U+0F7A, and U+0F7C,
+ // you'll get one of the possible TMW glyphs, maybe not the
+ // one that is most beautiful.
+
+ if ('\u0F00' == ch) {
+ return tmwFor0F00;
+ } else if ('\u0F02' == ch) {
+ return tmwFor0F02;
+ } else if ('\u0F03' == ch) {
+ return tmwFor0F03;
+ } else if ('\u0F0E' == ch) {
+ return tmwFor0F0E;
+ } else if ('\u0F40' == ch) {
+ return tmwFor0F40;
+ } else if ('\u0F42' == ch) {
+ return tmwFor0F42;
+ } else if ('\u0F49' == ch) {
+ return tmwFor0F49;
+ } else if ('\u0F4F' == ch) {
+ return tmwFor0F4F;
+ } else if ('\u0F51' == ch) {
+ return tmwFor0F51;
+ } else if ('\u0F53' == ch) {
+ return tmwFor0F53;
+ } else if ('\u0F5E' == ch) {
+ return tmwFor0F5E;
+ } else if ('\u0F62' == ch) {
+ return tmwFor0F62;
+ } else if ('\u0F64' == ch) {
+ return tmwFor0F64;
+ } else if ('\u0F67' == ch) {
+ return tmwFor0F67;
+ } else if ('\u0F6A' == ch) {
+ return tmwFor0F6A;
+ } else if ('\u0F73' == ch) {
+ return tmwFor0F73;
+ } else if ('\u0F76' == ch) {
+ return tmwFor0F76;
+ } else if ('\u0F77' == ch) {
+ return tmwFor0F77;
+ } else if ('\u0F78' == ch) {
+ return tmwFor0F78;
+ } else if ('\u0F79' == ch) {
+ return tmwFor0F79;
+ } else if ('\u0F7E' == ch) {
+ return tmwFor0F7E;
+ } else if ('\u0F81' == ch) {
+ return tmwFor0F81;
+ } else {
+ DuffCode[] x = UnicodeToTMW[ch - '\u0F00'];
+ if (null == x[0]) return null;
+ return x;
+ }
+ }
+
/** Returns the sequence of Unicode corresponding to the given
TibetanMachineWeb font
(0=TibetanMachineWeb,1=TibetanMachineWeb1,...) and
@@ -1657,11 +1814,11 @@ public static String mapTMWtoUnicode(int font, int ordinal) {
* of the TibetanMachine fonts, otherwise 0 */
public static int getTMFontNumber(String name) {
String internedName = name.intern();
- for (int i=1; i -1)
- return hashKey; //because '+' remains part of Extended Wylie for Sanskrit stacks
+ if (hashKey.indexOf(WYLIE_SANSKRIT_STACKING_KEY) > -1)
+ return hashKey; //because '+' remains part of Extended Wylie for Sanskrit stacks
- if (hashKey.charAt(0) == '-')
- return hashKey; //because must be '-i' or '-I' vowels
+ if (hashKey.charAt(0) == '-')
+ return hashKey; //because must be '-i' or '-I' vowels
- StringTokenizer st = new StringTokenizer(hashKey, "-");
- StringBuffer sb = new StringBuffer();
+ StringTokenizer st = new StringTokenizer(hashKey, "-");
+ StringBuffer sb = new StringBuffer();
- while (st.hasMoreTokens())
- sb.append(st.nextToken());
+ while (st.hasMoreTokens())
+ sb.append(st.nextToken());
- return sb.toString();
+ return sb.toString();
}
// DLC DOC
@@ -1776,12 +1933,12 @@ private static String getTMWToACIPErrorString(DuffCode dc) {
*/
public static String getWylieForGlyph(int font, int code,
boolean noSuchWylie[]) {
- String hashKey = getHashKeyForGlyph(font, code);
+ String hashKey = getHashKeyForGlyph(font, code);
if (hashKey == null) {
noSuchWylie[0] = true;
return getTMWToWylieErrorString(new DuffCode(font, (char)code));
}
- return wylieForGlyph(hashKey);
+ return wylieForGlyph(hashKey);
}
/**
@@ -1794,12 +1951,12 @@ private static String getTMWToACIPErrorString(DuffCode dc) {
* @return the Wylie value corresponding to the
* glyph denoted by dc */
public static String getWylieForGlyph(DuffCode dc, boolean noSuchWylie[]) {
- String hashKey = getHashKeyForGlyph(dc);
+ String hashKey = getHashKeyForGlyph(dc);
if (hashKey == null) {
noSuchWylie[0] = true;
return getTMWToWylieErrorString(dc);
}
- return wylieForGlyph(hashKey);
+ return wylieForGlyph(hashKey);
}
// DLC DOC
@@ -1837,11 +1994,11 @@ public static String getACIPForGlyph(DuffCode dc, boolean noSuchACIP[]) {
* false if not
*/
public static boolean isSanskritStack(int font, int code) {
- String val = toHashKey[font][code];
- if (val.indexOf(WYLIE_SANSKRIT_STACKING_KEY) == -1)
- return false;
- else
- return true;
+ String val = toHashKey[font][code];
+ if (val.indexOf(WYLIE_SANSKRIT_STACKING_KEY) == -1)
+ return false;
+ else
+ return true;
}
/**
@@ -1851,13 +2008,13 @@ public static boolean isSanskritStack(int font, int code) {
* false if not
*/
public static boolean isSanskritStack(DuffCode dc) {
- int font = dc.getFontNum();
- int code = dc.getCharNum()-32;
+ int font = dc.getFontNum();
+ int code = dc.getCharNum()-32;
- if (isSanskritStack(font, code))
- return true;
- else
- return false;
+ if (isSanskritStack(font, code))
+ return true;
+ else
+ return false;
}
/**
@@ -1868,11 +2025,11 @@ public static boolean isSanskritStack(DuffCode dc) {
* false if not
*/
public static boolean isStack(int font, int code) {
- String val = toHashKey[font][code];
- if (val.indexOf('-') < 1) //we allow '-i' and '-I' in as vowels
- return false;
- else
- return true;
+ String val = toHashKey[font][code];
+ if (val.indexOf('-') < 1) //we allow '-i' and '-I' in as vowels
+ return false;
+ else
+ return true;
}
/**
@@ -1882,10 +2039,10 @@ public static boolean isStack(int font, int code) {
* false if not
*/
public static boolean isStack(DuffCode dc) {
- int font = dc.getFontNum();
- int code = dc.getCharNum()-32;
+ int font = dc.getFontNum();
+ int code = dc.getCharNum()-32;
- return isStack(font, code);
+ return isStack(font, code);
}
/**
@@ -1895,7 +2052,7 @@ public static boolean isStack(DuffCode dc) {
* DuffCode for that key
*/
public static Map getTibHash() {
- return tibHash;
+ return tibHash;
}
/**
@@ -1906,7 +2063,7 @@ public static Map getTibHash() {
* such vowel glyph
*/
public static Map getBinduMap() {
- return binduMap;
+ return binduMap;
}
/**
@@ -1915,7 +2072,7 @@ public static Map getBinduMap() {
* false if not
* @see TibetanKeyboard */
public static boolean hasDisambiguatingKey() {
- return hasDisambiguatingKey;
+ return hasDisambiguatingKey;
}
/**
@@ -1925,7 +2082,7 @@ public static boolean hasDisambiguatingKey() {
* @see TibetanKeyboard
*/
public static char getDisambiguatingKey() {
- return disambiguating_key;
+ return disambiguating_key;
}
/**
@@ -1934,7 +2091,7 @@ public static char getDisambiguatingKey() {
* false if not
* @see TibetanKeyboard */
public static boolean hasSanskritStackingKey() {
- return hasSanskritStackingKey;
+ return hasSanskritStackingKey;
}
/**
@@ -1943,7 +2100,7 @@ public static boolean hasSanskritStackingKey() {
* false if not
* @see TibetanKeyboard */
public static boolean hasTibetanStackingKey() {
- return hasTibetanStackingKey;
+ return hasTibetanStackingKey;
}
/**
@@ -1952,7 +2109,7 @@ public static boolean hasTibetanStackingKey() {
* there is no stacking key
* @see TibetanKeyboard */
public static boolean isStackingMedial() {
- return isStackingMedial;
+ return isStackingMedial;
}
/**
@@ -1962,7 +2119,7 @@ public static boolean isStackingMedial() {
* @see TibetanKeyboard
*/
public static char getStackingKey() {
- return stacking_key;
+ return stacking_key;
}
/**
@@ -1972,7 +2129,7 @@ public static char getStackingKey() {
* in Wylie)
* @see TibetanKeyboard */
public static boolean isAChenRequiredBeforeVowel() {
- return isAChenRequiredBeforeVowel;
+ return isAChenRequiredBeforeVowel;
}
/**
@@ -1981,7 +2138,7 @@ public static boolean isAChenRequiredBeforeVowel() {
* of stacking, false if not (as in Wylie)
* @see TibetanKeyboard */
public static boolean isAChungConsonant() {
- return isAChungConsonant;
+ return isAChungConsonant;
}
/**
@@ -1991,7 +2148,7 @@ public static boolean isAChungConsonant() {
* not
* @see TibetanKeyboard */
public static boolean hasAVowel() {
- return hasAVowel;
+ return hasAVowel;
}
/**
@@ -2001,7 +2158,7 @@ public static boolean hasAVowel() {
* @see TibetanKeyboard
*/
public static String getAVowel() {
- return aVowel;
+ return aVowel;
}
/**
@@ -2010,13 +2167,13 @@ public static String getAVowel() {
* @return true if the glyph is a top-hanging (superscript) vowel (i,
* u, e, o, ai, or ao) and false if not */
public static boolean isTopVowel(DuffCode dc) {
- String wylie
+ String wylie
= getWylieForGlyph(dc,
TibTextUtils.weDoNotCareIfThereIsCorrespondingWylieOrNot);
- if (top_vowels.contains(wylie))
- return true;
+ if (top_vowels.contains(wylie))
+ return true;
- return false;
+ return false;
}
/** Returns true if and only if ch, which is an ASCII character