Jskad supports <7, >8, etc. again; it no longer supports the punctuation

'<' and '>'. The current keyboard implementation makes this an either-or proposition, when fundamentally it need not be. Added a <?Numbers?> command and an <?Input:Numbers?> command to tibwn.ini; broke the numbers apart from the consonants. This facilitates the new-and-improved Tibetan->Wylie conversion. Tibetan->Wylie is now done by forming legal tsheg-bars. A legal tsheg bar is converted into perfect THDL Wylie. See code comments to learn what it thinks is a legal tsheg-bar, but it inlcudes bskyUMbsH minus the trailing punctuation (H), e.g. Illegal sequences, such as runs of transliterated Sanskrit, are turned into unambiguous Wylie; each glyph is followed by a vowel or a disambiguator ('.'). I've made it so that the illegal sequences are as beautiful as possible. You get 'pad+me', for example, not the equivalent but uglier 'pad+m.e.'.
2003-07-08 14:30:17 +00:00 · 2003-07-08 14:30:17 +00:00 · 02558a1d78
commit 02558a1d78
parent c04a3f189b
6 changed files with 878 additions and 479 deletions
--- a/source/org/thdl/tib/text/TibetanMachineWeb.java
+++ b/source/org/thdl/tib/text/TibetanMachineWeb.java
@ -60,6 +60,9 @@ public class TibetanMachineWeb implements THDLWylieConstants {

 	private static TibetanKeyboard keyboard = null;
 	private static Set charSet = null;
+	private static Set tibSet = null;
+	private static Set sanskritStackSet = null;
+	private static Set numberSet = null;
 	private static Set vowelSet = null;
 	private static Set puncSet = null;
 	private static Set topSet = null;
@ -346,26 +349,64 @@ public class TibetanMachineWeb implements THDLWylieConstants {
            }
 			String line;
 			boolean hashOn = false;
-			boolean isSanskrit = false; //FIXME: this is never read.
+
+            // is this a Tibetan consonant or consonant stack?
+			boolean isTibetan = false;
+
+            // is this a Sanskrit consonant stack?
+			boolean isSanskrit = false;
+
 			boolean ignore = false;

+            tibSet = new HashSet();
+            sanskritStackSet = new HashSet();
+
 			while ((line = in.readLine()) != null) {
 				if (line.startsWith("<?")) { //line is command
 					if (line.equalsIgnoreCase("<?Consonants?>")) {
 						isSanskrit = false;
+						isTibetan = true;
 						hashOn = false;
+                        ignore = false;
 						line = in.readLine();
-						charSet = new HashSet();
+						if (null == charSet) charSet = new HashSet();
 						StringTokenizer st = new StringTokenizer(line,",");
 						while (st.hasMoreTokens()) {
                            String ntk;
 							charSet.add(ntk = st.nextToken());
+							tibSet.add(ntk);
                            validInputSequences.put(ntk, anyOldObjectWillDo);
                        }
 					}
+					else if (line.equalsIgnoreCase("<?Numbers?>")) {
+                        // FIXME: for historical reasons, numbers go
+                        // in both charSet and numberSet.
+						isSanskrit = false;
+						isTibetan = false;
+						hashOn = false;
+                        ignore = false;
+						line = in.readLine();
+                        if (null == charSet) charSet = new HashSet();
+						numberSet = new HashSet();
+						StringTokenizer st = new StringTokenizer(line,",");
+						while (st.hasMoreTokens()) {
+                            String ntk;
+                            // DLC FIXME: don't add it to numberSet
+                            // and charSet here; do it in
+                            // <?Input:Numbers?> so that Jskad has the
+                            // same TMW->Wylie conversion regardless
+                            // of whether or not it chooses to support
+                            // inputting numbers.
+							numberSet.add(ntk = st.nextToken());
+							charSet.add(ntk);
+                            validInputSequences.put(ntk, anyOldObjectWillDo);
+                        }
+                    }
 					else if (line.equalsIgnoreCase("<?Vowels?>")) {
 						isSanskrit = false;
+						isTibetan = false;
 						hashOn = false;
+                        ignore = false;
 						line = in.readLine();
 						vowelSet = new HashSet();
 						StringTokenizer st = new StringTokenizer(line,",");
@ -377,7 +418,9 @@ public class TibetanMachineWeb implements THDLWylieConstants {
 					}
 					else if (line.equalsIgnoreCase("<?Other?>")) {
 						isSanskrit = false;
+						isTibetan = false;
 						hashOn = false;
+                        ignore = false;
 						line = in.readLine();
 						puncSet = new HashSet();
 						StringTokenizer st = new StringTokenizer(line,",");
@ -389,29 +432,47 @@ public class TibetanMachineWeb implements THDLWylieConstants {
 					}

 					else if (line.equalsIgnoreCase("<?Input:Punctuation?>")
-						|| line.equalsIgnoreCase("<?Input:Vowels?>")
-						|| line.equalsIgnoreCase("<?Input:Tibetan?>")) {
+                             || line.equalsIgnoreCase("<?Input:Vowels?>")) {
 						isSanskrit = false;
+						isTibetan = false;
+						hashOn = true;
+						ignore = false;
+                    }
+                    else if (line.equalsIgnoreCase("<?Input:Tibetan?>")) {
+						isSanskrit = false;
+						isTibetan = true;
+						hashOn = true;
+						ignore = false;
+					}
+                    else if (line.equalsIgnoreCase("<?Input:Numbers?>")) {
+						isSanskrit = false;
+						isTibetan = false;
 						hashOn = true;
 						ignore = false;
 					}
 					else if (line.equalsIgnoreCase("<?Input:Sanskrit?>")) {
 						isSanskrit = true;
+						isTibetan = false;
 						hashOn = true;
 						ignore = false;
 					}
 					else if (line.equalsIgnoreCase("<?ToWylie?>")) {
 						isSanskrit = false;
+						isTibetan = false;
 						hashOn = false;
 						ignore = false;
 					}
-					else if (line.equalsIgnoreCase("<?Ignore?>"))
+					else if (line.equalsIgnoreCase("<?Ignore?>")) {
+						isSanskrit = false;
 						ignore = true;
+                    }
 				}
-				else if (line.startsWith("//")) //comment
+				else if (line.startsWith("//")) { //comment
 					;
-				else if (line.equals("")) //empty string
+                }
+				else if (line.equals("")) {//empty string
 					;
+                }
 				else {
 					StringTokenizer st = new StringTokenizer(line,DELIMITER,true);

@ -559,6 +620,21 @@ public class TibetanMachineWeb implements THDLWylieConstants {
                        if (hashOn) {
                            tibHash.put(wylie, duffCodes);
                        }
+                        if (isTibetan) {
+                            // Delete the dashes:
+                            StringBuffer wylieWithoutDashes = new StringBuffer(wylie);
+                            for (int wl = 0; wl < wylieWithoutDashes.length(); wl++) {
+                                if (wylieWithoutDashes.charAt(wl) == '-') {
+                                    wylieWithoutDashes.deleteCharAt(wl);
+                                    --wl;
+                                }
+                            }
+                            tibSet.add(wylieWithoutDashes.toString());
+                        }
+
+                        if (isSanskrit) {
+                            sanskritStackSet.add(wylie);
+                        }

                        if (null == duffCodes[TMW])
                            throw new Error(fileName
@ -726,13 +802,13 @@ public static boolean isFormatting(char c) {
 }

 /**
-* Checks to see if the passed string
-* is a character in the installed keyboard.
+* Checks to see if the passed string is a character (a single
+* [possibly Sanskrit or va or fa] consonant or a number [possibly
+* super- or subscribed]) in the installed keyboard.
 *
 * @param s the string you want to check
-* @return true if s is a character in the current keyboard,
-* false if not
-*/
+* @return true if s is a character in the current keyboard, false if
+* not */
 public static boolean isChar(String s) {
 	if (currentKeyboardIsExtendedWylie())
 		return charSet.contains(s);
@ -741,16 +817,58 @@ public static boolean isChar(String s) {
 }

 /**
-* Checks to see if the passed string
-* is a character in Extended Wylie.
+* Checks to see if the passed string is a character (a single
+* [possibly Sanskrit or va or fa] consonant or a number [possibly
+* super- or subscribed]) in Extended Wylie.
 * @param s the string to be checked
-* @return true if s is a character in
-* Extended Wylie transliteration, false if not
-*/
+* @return true if s is a character in Extended Wylie transliteration,
+* false if not */
 public static boolean isWylieChar(String s) {
 	return charSet.contains(s);
 }

+
+/**
+* Checks to see if the passed string is a consonant or unadorned
+* consonant stack in Extended Wylie.
+* @param s the string to be checked
+* @return true if s is such in Extended Wylie transliteration, false
+* if not */
+public static boolean isWylieTibetanConsonantOrConsonantStack(String s) {
+	return tibSet.contains(s);
+}
+
+/**
+* Returns true if and only if s is the THDL Extended Wylie for a
+* Sanskrit multi-consonant stack.
+*/
+public static boolean isWylieSanskritConsonantStack(String s) {
+	return sanskritStackSet.contains(s);
+}
+
+/** Returns true if and only if s is the THDL Extended Wylie
+    representation of a legal tsheg-bar appendage 'i, 'e, 'u, 'o, 'am,
+    or 'ang.  The word le'u (chapter) contains such an appendage,
+    e.g. */
+public static boolean isWylieAchungAppendage(String s) {
+    return (s.equals("'e")
+            || s.equals("'i")
+            || s.equals("'o")
+            || s.equals("'u")
+            || s.equals("'ang")
+            || s.equals("'am"));
+}
+
+/**
+* Checks to see if the passed string is a number [possibly super- or
+* subscribed]) in Extended Wylie.
+* @param s the string to be checked
+* @return true if s is a number in Extended Wylie transliteration,
+* false if not */
+public static boolean isWylieNumber(String s) {
+	return numberSet.contains(s);
+}
+
 /**
 * Checks to see if the passed string
 * is punctuation in the installed keyboard.
@ -826,6 +944,32 @@ public static boolean isWylieVowel(String s) {
 	return vowelSet.contains(s);
 }

+/** Returns true if and only if wylie is the THDL Extended Wylie for
+    an adornment.  An adornment is something that is part of a stack
+    but is not a consonant, such as a Tibetan or Sanskrit vowel or a
+    bindu.  Note that an adornment might be both an adornment and a
+    vowel, or an adornment and punctuation. */
+public static boolean isWylieAdornment(String wylie) {
+	return (vowelSet.contains(wylie)
+            || (wylie.equals("M") /* U+0F7E */
+                || wylie.equals("M^") /* U+0F83 */
+                || wylie.equals("iM")
+                || wylie.equals("-iM")
+                || wylie.equals("eM")
+                || wylie.equals("aiM")
+                || wylie.equals("oM")
+                || wylie.equals("auM")));
+}
+
+/** Returns true if and only if wylie is the THDL Extended Wylie for
+    an adornment {@link #isWylieAdornment(String)} that contains a
+    vowel within it. */
+public static boolean isWylieAdornmentAndContainsVowel(String wylie) {
+	return (isWylieAdornment(wylie) &&
+            !wylie.equals("M") /* U+0F7E */
+            && !wylie.equals("M^") /* U+0F83 */);
+}
+
 /**
 * Returns true iff this Wylie is valid as a leftmost character in a
 * Tibetan syllable.  For example, in the syllable 'brgyad', 'b' is the
@ -839,9 +983,9 @@ public static boolean isWylieLeft(String s) {
 }

 /**
-* Returns true iff this Wylie is valid as a right (post-vowel)
-* character in a Tibetan syllable.  For example, in the syllable
-* 'lags', 'g' is in the right character position. Valid right
+* Returns true iff this Wylie is valid as a suffix (i.e., a right
+* (post-vowel) character) in a Tibetan syllable.  For example, in the
+* syllable 'lags', 'g' is in the right character position. Valid right
 * characters include g, ng, d, n, b, m, r, l, s, ', and T.
 * @param s the (Wylie) string to be checked
 * @return true if s is a possible right character in a Tibetan