Jskad supports <7, >8, etc. again; it no longer supports the punctuation

'<' and '>'. The current keyboard implementation makes this an either-or proposition, when fundamentally it need not be. Added a <?Numbers?> command and an <?Input:Numbers?> command to tibwn.ini; broke the numbers apart from the consonants. This facilitates the new-and-improved Tibetan->Wylie conversion. Tibetan->Wylie is now done by forming legal tsheg-bars. A legal tsheg bar is converted into perfect THDL Wylie. See code comments to learn what it thinks is a legal tsheg-bar, but it inlcudes bskyUMbsH minus the trailing punctuation (H), e.g. Illegal sequences, such as runs of transliterated Sanskrit, are turned into unambiguous Wylie; each glyph is followed by a vowel or a disambiguator ('.'). I've made it so that the illegal sequences are as beautiful as possible. You get 'pad+me', for example, not the equivalent but uglier 'pad+m.e.'.
2003-07-08 14:30:17 +00:00 · 2003-07-08 14:30:17 +00:00 · 02558a1d78
commit 02558a1d78
parent c04a3f189b
6 changed files with 878 additions and 479 deletions
--- a/source/org/thdl/tib/input/DuffPaneTest.java
+++ b/source/org/thdl/tib/input/DuffPaneTest.java
@ -42,7 +42,7 @@ public class DuffPaneTest extends TestCase {
        // We don't want to load the TM or TMW font files ourselves:
        ThdlOptions.setUserPreference("thdl.rely.on.system.tmw.fonts", true);
-        ThdlOptions.setUserPreference("thdl.do.not.rely.on.system.tm.fonts", false);
+        ThdlOptions.setUserPreference("thdl.rely.on.system.tm.fonts", true);
        ThdlOptions.setUserPreference("thdl.debug", true);
        dp = new DuffPane();
@ -102,11 +102,23 @@ public class DuffPaneTest extends TestCase {
     *  and then converting the result to Extended Wylie. */
    public void testWylieToIRToWylie() {
        ensureKeysGiveCorrectWylie("kue ");
        ensureKeysGiveCorrectWylie("<8<7<0 ");
        ensureKeysGiveCorrectWylie("012345678901234 ");
        ensureKeysGiveCorrectWylie("ka<7 ",
                                   "ka<7. ");
        ensureKeysGiveCorrectWylie("ka <7 ");
        ensureKeysGiveCorrectWylie("ka>7 ",
                                   "ka>7. ");
        ensureKeysGiveCorrectWylie("ka >7 ");
 // DLC FIXME : M^ doesn't work.  nga, na do, k,kh do, why not M, M^?
        ensureKeysGiveCorrectWylie("kuau ");
        ensureKeysGiveCorrectWylie("ku-i ");
        ensureKeysGiveCorrectWylie("kuai ");
        ensureKeysGiveCorrectWylie("cuig ");
-        ensureKeysGiveCorrectWylie("kcuig ");
+        ensureKeysGiveCorrectWylie("kcuig ",
                                   "kacuiga ");
        ensureKeysGiveCorrectWylie("gcuig ");
        ensureKeysGiveCorrectWylie("gcuigs'e'i'i'o'am'ang'e'o'u'am'am ");
        ensureKeysGiveCorrectWylie("nga ");
        ensureKeysGiveCorrectWylie("nga /");
@ -144,17 +156,20 @@ public class DuffPaneTest extends TestCase {
        ensureKeysGiveCorrectWylie("blar.d");
        ensureKeysGiveCorrectWylie("blarad",
                                   "blar.d");
-        ensureKeysGiveCorrectWylie("b.lard");
+        ensureKeysGiveCorrectWylie("b.lard",
                                   "balarda");
        ensureKeysGiveCorrectWylie("b.lal.d");
        ensureKeysGiveCorrectWylie("blald",
                                   "blalda");
-        ensureKeysGiveCorrectWylie("b.lald");
+        ensureKeysGiveCorrectWylie("b.lald",
                                   "balalda");
        ensureKeysGiveCorrectWylie("b.las.d");
        ensureKeysGiveCorrectWylie("blasd",
                                   "blasda");
-        ensureKeysGiveCorrectWylie("b.lasd");
+        ensureKeysGiveCorrectWylie("b.lasd",
                                   "balasda");
        ensureKeysGiveCorrectWylie("b.lag");
        ensureKeysGiveCorrectWylie("blg",
@ -233,7 +248,7 @@ public class DuffPaneTest extends TestCase {
        ensureKeysGiveCorrectWylie("b.lags");
        ensureKeysGiveCorrectWylie("blags");
-        // DLC add b-r-g-s, b-l-g-s,
+        // DLC FIXME: add b-r-g-s, b-l-g-s, etc.
        ensureKeysGiveCorrectWylie("mngas",
@ -268,15 +283,43 @@ public class DuffPaneTest extends TestCase {
            ensureKeysGiveCorrectWylie("skalazasa");
            ensureKeysGiveCorrectWylie("jskad",
                                       "jaskada");
        }
        {
            // These are incorrectly handled in terms of
            // makeIllegalTibetanGoEndToEnd.  DLC FIXME.
            ensureKeysGiveCorrectWylie("jeskad",
-                                       "jeskd");
+                                       "jeskada");
-            ensureKeysGiveCorrectWylie("jeskd");
+            ensureKeysGiveCorrectWylie("jeskd",
                                       "jesakada");
            ensureKeysGiveCorrectWylie("jesakada",
-                                       "jeskd");
+                                       "jesakada");
        }
        {
            // DLC FIXME: ai gives a.ai, a.i is required to get ai.
            // DLC FIXME: haaa doesn't get you h.a., neither does
            // ha.a; achen is tough to get.
        }
        ensureKeysGiveCorrectWylie("heM hiM h-iM heM haiM hoM hauM hUM ");
        ensureKeysGiveCorrectWylie("hi.M ho.M he.M hu.M",
                                   "hiM hoM heM huM");
        ensureKeysGiveCorrectWylie("brgwU-imd");
        ensureKeysGiveCorrectWylie("pad+me");
        ensureKeysGiveCorrectWylie("pad+men+b+h+yuM");
        ensureKeysGiveCorrectWylie("bskyUMbs");
        ensureKeysGiveCorrectWylie("bskyUMbsHgro ");
        ensureKeysGiveCorrectWylie("favakakhagangacachajanyatathadanapaphabamatsatshadzawazhaza'ayaralashasahaTaThaDaNaSha");
        ensureKeysGiveCorrectWylie("fevekekhegengecechejenyetethedenepephebemetsetshedzewezheze'eyerelesheseheTeTheDeNeShe");
        ensureKeysGiveCorrectWylie("fuvukukhugungucuchujunyututhudunupuphubumutsutshudzuwuzhuzu'uyurulushusuhuTuThuDuNuShu");
        ensureKeysGiveCorrectWylie("fovokokhogongocochojonyotothodonopophobomotsotshodzowozhozo'oyoroloshosohoToThoDoNoSho");
        ensureKeysGiveCorrectWylie("faivaikaikhaigaingaicaichaijainyaitaithaidainaipaiphaibaimaitsaitshaidzaiwaizhaizai'aiyairailaishaisaihaiTaiThaiDaiNaiShai");
        ensureKeysGiveCorrectWylie("fauvaukaukhaugaungaucauchaujaunyautauthaudaunaupauphaubaumautsautshaudzauwauzhauzau'auyauraulaushausauhauTauThauDauNauShau");
        ensureKeysGiveCorrectWylie("fivikikhigingicichijinyitithidinipiphibimitsitshidziwizhizi'iyirilishisihiTiThiDiNiShi");
        ensureKeysGiveCorrectWylie("don't touch my coffee/that makes me very angry/supersize my drink",
                                   "dona'ata tocha mya cofafe/thata mkes me veraya angaraya/superasize mya drinaka");
    }
 }
--- a/source/org/thdl/tib/text/DuffCode.java
+++ b/source/org/thdl/tib/text/DuffCode.java
@ -152,7 +152,11 @@ public final class DuffCode {
 /**
 * @return a string representation of this object */
 	public String toString() {
-		return "<duffcode font=" + fontNum
+        boolean[] err = new boolean[] { false };
        String wylie = TibetanMachineWeb.getWylieForGlyph(this, err);
        if (err[0]) wylie = "undefined";
 		return "<duffcode wylie="
            + wylie + " font=" + fontNum
            + " charNum=" + charNum + " character="
            + new Character(getCharacter()).toString() + "/>";
 	}
@ -160,7 +164,11 @@ public final class DuffCode {
 * @param TMW if this DuffCode represents a TMW glyph, not a TM glyph
 * @return a string representation of this object */
 	public String toString(boolean TMW) {
-		return "<duffcode font="
+        boolean[] err = new boolean[] { false };
        String wylie = TibetanMachineWeb.getWylieForGlyph(this, err);
        if (err[0]) wylie = "undefined";
 		return "<duffcode wylie="
            + wylie + " font="
            + (TMW
               ? TibetanMachineWeb.tmwFontNames
               : TibetanMachineWeb.tmFontNames)[fontNum]
--- a/source/org/thdl/tib/text/THDLWylieConstants.java
+++ b/source/org/thdl/tib/text/THDLWylieConstants.java
@ -41,6 +41,12 @@ public interface THDLWylieConstants {
 * the Wylie disambiguating key, as a char
 */
 	public static final char WYLIE_DISAMBIGUATING_KEY = '.';
 /**
 * the Wylie disambiguating key, as a String
 */
 	public static final String WYLIE_DISAMBIGUATING_KEY_STRING
        = new String(new char[] { WYLIE_DISAMBIGUATING_KEY });
 /**
 * the Wylie for the invisible 'a' vowel
 */
--- a/source/org/thdl/tib/text/TibTextUtils.java
+++ b/source/org/thdl/tib/text/TibTextUtils.java
--- a/source/org/thdl/tib/text/TibetanMachineWeb.java
+++ b/source/org/thdl/tib/text/TibetanMachineWeb.java
@ -60,6 +60,9 @@ public class TibetanMachineWeb implements THDLWylieConstants {
 	private static TibetanKeyboard keyboard = null;
 	private static Set charSet = null;
 	private static Set tibSet = null;
 	private static Set sanskritStackSet = null;
 	private static Set numberSet = null;
 	private static Set vowelSet = null;
 	private static Set puncSet = null;
 	private static Set topSet = null;
@ -346,26 +349,64 @@ public class TibetanMachineWeb implements THDLWylieConstants {
            }
 			String line;
 			boolean hashOn = false;
-			boolean isSanskrit = false; //FIXME: this is never read.
+
            // is this a Tibetan consonant or consonant stack?
 			boolean isTibetan = false;
            // is this a Sanskrit consonant stack?
 			boolean isSanskrit = false;
 			boolean ignore = false;
            tibSet = new HashSet();
            sanskritStackSet = new HashSet();
 			while ((line = in.readLine()) != null) {
 				if (line.startsWith("<?")) { //line is command
 					if (line.equalsIgnoreCase("<?Consonants?>")) {
 						isSanskrit = false;
 						isTibetan = true;
 						hashOn = false;
                        ignore = false;
 						line = in.readLine();
-						charSet = new HashSet();
+						if (null == charSet) charSet = new HashSet();
 						StringTokenizer st = new StringTokenizer(line,",");
 						while (st.hasMoreTokens()) {
                            String ntk;
 							charSet.add(ntk = st.nextToken());
 							tibSet.add(ntk);
                            validInputSequences.put(ntk, anyOldObjectWillDo);
                        }
 					}
 					else if (line.equalsIgnoreCase("<?Numbers?>")) {
                        // FIXME: for historical reasons, numbers go
                        // in both charSet and numberSet.
 						isSanskrit = false;
 						isTibetan = false;
 						hashOn = false;
                        ignore = false;
 						line = in.readLine();
                        if (null == charSet) charSet = new HashSet();
 						numberSet = new HashSet();
 						StringTokenizer st = new StringTokenizer(line,",");
 						while (st.hasMoreTokens()) {
                            String ntk;
                            // DLC FIXME: don't add it to numberSet
                            // and charSet here; do it in
                            // <?Input:Numbers?> so that Jskad has the
                            // same TMW->Wylie conversion regardless
                            // of whether or not it chooses to support
                            // inputting numbers.
 							numberSet.add(ntk = st.nextToken());
 							charSet.add(ntk);
                            validInputSequences.put(ntk, anyOldObjectWillDo);
                        }
                    }
 					else if (line.equalsIgnoreCase("<?Vowels?>")) {
 						isSanskrit = false;
 						isTibetan = false;
 						hashOn = false;
                        ignore = false;
 						line = in.readLine();
 						vowelSet = new HashSet();
 						StringTokenizer st = new StringTokenizer(line,",");
@ -377,7 +418,9 @@ public class TibetanMachineWeb implements THDLWylieConstants {
 					}
 					else if (line.equalsIgnoreCase("<?Other?>")) {
 						isSanskrit = false;
 						isTibetan = false;
 						hashOn = false;
                        ignore = false;
 						line = in.readLine();
 						puncSet = new HashSet();
 						StringTokenizer st = new StringTokenizer(line,",");
@ -389,29 +432,47 @@ public class TibetanMachineWeb implements THDLWylieConstants {
 					}
 					else if (line.equalsIgnoreCase("<?Input:Punctuation?>")
-						|| line.equalsIgnoreCase("<?Input:Vowels?>")
+                             || line.equalsIgnoreCase("<?Input:Vowels?>")) {
 						|| line.equalsIgnoreCase("<?Input:Tibetan?>")) {
 						isSanskrit = false;
 						isTibetan = false;
 						hashOn = true;
 						ignore = false;
                    }
                    else if (line.equalsIgnoreCase("<?Input:Tibetan?>")) {
 						isSanskrit = false;
 						isTibetan = true;
 						hashOn = true;
 						ignore = false;
 					}
                    else if (line.equalsIgnoreCase("<?Input:Numbers?>")) {
 						isSanskrit = false;
 						isTibetan = false;
 						hashOn = true;
 						ignore = false;
 					}
 					else if (line.equalsIgnoreCase("<?Input:Sanskrit?>")) {
 						isSanskrit = true;
 						isTibetan = false;
 						hashOn = true;
 						ignore = false;
 					}
 					else if (line.equalsIgnoreCase("<?ToWylie?>")) {
 						isSanskrit = false;
 						isTibetan = false;
 						hashOn = false;
 						ignore = false;
 					}
-					else if (line.equalsIgnoreCase("<?Ignore?>"))
+					else if (line.equalsIgnoreCase("<?Ignore?>")) {
 						isSanskrit = false;
 						ignore = true;
                    }
-				else if (line.startsWith("//")) //comment
+				}
 				else if (line.startsWith("//")) { //comment
 					;
-				else if (line.equals("")) //empty string
+                }
 				else if (line.equals("")) {//empty string
 					;
                }
 				else {
 					StringTokenizer st = new StringTokenizer(line,DELIMITER,true);
@ -559,6 +620,21 @@ public class TibetanMachineWeb implements THDLWylieConstants {
                        if (hashOn) {
                            tibHash.put(wylie, duffCodes);
                        }
                        if (isTibetan) {
                            // Delete the dashes:
                            StringBuffer wylieWithoutDashes = new StringBuffer(wylie);
                            for (int wl = 0; wl < wylieWithoutDashes.length(); wl++) {
                                if (wylieWithoutDashes.charAt(wl) == '-') {
                                    wylieWithoutDashes.deleteCharAt(wl);
                                    --wl;
                                }
                            }
                            tibSet.add(wylieWithoutDashes.toString());
                        }
                        if (isSanskrit) {
                            sanskritStackSet.add(wylie);
                        }
                        if (null == duffCodes[TMW])
                            throw new Error(fileName
@ -726,13 +802,13 @@ public static boolean isFormatting(char c) {
 }
 /**
-* Checks to see if the passed string
+* Checks to see if the passed string is a character (a single
-* is a character in the installed keyboard.
+* [possibly Sanskrit or va or fa] consonant or a number [possibly
 * super- or subscribed]) in the installed keyboard.
 *
 * @param s the string you want to check
-* @return true if s is a character in the current keyboard,
+* @return true if s is a character in the current keyboard, false if
-* false if not
+* not */
 */
 public static boolean isChar(String s) {
 	if (currentKeyboardIsExtendedWylie())
 		return charSet.contains(s);
@ -741,16 +817,58 @@ public static boolean isChar(String s) {
 }
 /**
-* Checks to see if the passed string
+* Checks to see if the passed string is a character (a single
-* is a character in Extended Wylie.
+* [possibly Sanskrit or va or fa] consonant or a number [possibly
 * super- or subscribed]) in Extended Wylie.
 * @param s the string to be checked
-* @return true if s is a character in
+* @return true if s is a character in Extended Wylie transliteration,
-* Extended Wylie transliteration, false if not
+* false if not */
 */
 public static boolean isWylieChar(String s) {
 	return charSet.contains(s);
 }
 /**
 * Checks to see if the passed string is a consonant or unadorned
 * consonant stack in Extended Wylie.
 * @param s the string to be checked
 * @return true if s is such in Extended Wylie transliteration, false
 * if not */
 public static boolean isWylieTibetanConsonantOrConsonantStack(String s) {
 	return tibSet.contains(s);
 }
 /**
 * Returns true if and only if s is the THDL Extended Wylie for a
 * Sanskrit multi-consonant stack.
 */
 public static boolean isWylieSanskritConsonantStack(String s) {
 	return sanskritStackSet.contains(s);
 }
 /** Returns true if and only if s is the THDL Extended Wylie
    representation of a legal tsheg-bar appendage 'i, 'e, 'u, 'o, 'am,
    or 'ang.  The word le'u (chapter) contains such an appendage,
    e.g. */
 public static boolean isWylieAchungAppendage(String s) {
    return (s.equals("'e")
            || s.equals("'i")
            || s.equals("'o")
            || s.equals("'u")
            || s.equals("'ang")
            || s.equals("'am"));
 }
 /**
 * Checks to see if the passed string is a number [possibly super- or
 * subscribed]) in Extended Wylie.
 * @param s the string to be checked
 * @return true if s is a number in Extended Wylie transliteration,
 * false if not */
 public static boolean isWylieNumber(String s) {
 	return numberSet.contains(s);
 }
 /**
 * Checks to see if the passed string
 * is punctuation in the installed keyboard.
@ -826,6 +944,32 @@ public static boolean isWylieVowel(String s) {
 	return vowelSet.contains(s);
 }
 /** Returns true if and only if wylie is the THDL Extended Wylie for
    an adornment.  An adornment is something that is part of a stack
    but is not a consonant, such as a Tibetan or Sanskrit vowel or a
    bindu.  Note that an adornment might be both an adornment and a
    vowel, or an adornment and punctuation. */
 public static boolean isWylieAdornment(String wylie) {
 	return (vowelSet.contains(wylie)
            || (wylie.equals("M") /* U+0F7E */
                || wylie.equals("M^") /* U+0F83 */
                || wylie.equals("iM")
                || wylie.equals("-iM")
                || wylie.equals("eM")
                || wylie.equals("aiM")
                || wylie.equals("oM")
                || wylie.equals("auM")));
 }
 /** Returns true if and only if wylie is the THDL Extended Wylie for
    an adornment {@link #isWylieAdornment(String)} that contains a
    vowel within it. */
 public static boolean isWylieAdornmentAndContainsVowel(String wylie) {
 	return (isWylieAdornment(wylie) &&
            !wylie.equals("M") /* U+0F7E */
            && !wylie.equals("M^") /* U+0F83 */);
 }
 /**
 * Returns true iff this Wylie is valid as a leftmost character in a
 * Tibetan syllable.  For example, in the syllable 'brgyad', 'b' is the
@ -839,9 +983,9 @@ public static boolean isWylieLeft(String s) {
 }
 /**
-* Returns true iff this Wylie is valid as a right (post-vowel)
+* Returns true iff this Wylie is valid as a suffix (i.e., a right
-* character in a Tibetan syllable.  For example, in the syllable
+* (post-vowel) character) in a Tibetan syllable.  For example, in the
-* 'lags', 'g' is in the right character position. Valid right
+* syllable 'lags', 'g' is in the right character position. Valid right
 * characters include g, ng, d, n, b, m, r, l, s, ', and T.
 * @param s the (Wylie) string to be checked
 * @return true if s is a possible right character in a Tibetan
--- a/source/org/thdl/tib/text/tibwn.ini
+++ b/source/org/thdl/tib/text/tibwn.ini
@ -7,22 +7,27 @@
 //   - <?x?> marks a command
 //   - the commands are:
 //   Consonants - set of consonants in tibetan
 //   Numbers - set of numbers in tibetan
 //   Vowels - set of vowels
-//   Other - other characters: numbers, punctuation, etc.
+//   Other - other characters: punctuation, etc.
 //   Input - those codes which serve basis for wylie input method
-//	subtypes: Input:Punctuation, Input:Vowels, Input:Tibetan, Input:Sanskrit
+//	subtypes: Input:Punctuation, Input:Vowels, Input:Tibetan,
 //            Input:Numbers, Input:Sanskrit
 //   ToWylie - codes only needed for duff to wylie conversion, including vowels
 //   Ignore - ignore until another command is reached
 <?Consonants?>
-k,kh,g,ng,c,ch,j,ny,t,th,d,n,p,ph,b,m,ts,tsh,dz,w,zh,z,',y,r,l,sh,s,h,a,T,Th,D,N,Sh,v,f,Dz,0,1,2,3,4,5,6,7,8,9,>0,>1,>2,>3,>4,>5,>6,>7,>8,>9,<0,<1,<2,<3,<4,<5,<6,<7,<8,<9
+k,kh,g,ng,c,ch,j,ny,t,th,d,n,p,ph,b,m,ts,tsh,dz,w,zh,z,',y,r,l,sh,s,h,a,T,Th,D,N,Sh,v,f,Dz
 <?Numbers?>
 0,1,2,3,4,5,6,7,8,9,>0,>1,>2,>3,>4,>5,>6,>7,>8,>9,<0,<1,<2,<3,<4,<5,<6,<7,<8,<9
 <?Vowels?>
 a,i,u,e,o,I,U,ai,au,A,-i,-I
 <?Other?>
-_, ,/,|,!,:,;,@,#,$,%,(,),H,M,`,&,@#,?,=,[,],<,>,{,},*
+_, ,/,|,!,:,;,@,#,$,%,(,),H,M,`,&,@#,?,=,[,],{,},*
-// FIXME: add these etc.: M^,~,~^
+// FIXME: add these etc.: M^,~,~^,<,> (< and > cause ka<7 to quit working)
 <?Input:Punctuation?>
 //_~32,1~0,32
@ -691,6 +696,8 @@ a+y~143,4~~8,63~1,109~8,120~1,123~1,125~8,106~8,113~f68,fb1
 a+r~144,4~~8,64~1,109~8,120~1,123~1,125~8,106~8,113~f68,fb2
 a+r+y~145,4~~8,65~1,109~8,121~1,123~1,125~8,107~8,114~f68,fb2,fb1
 <?Input:Numbers?>
 //numbers
 0~190,1~~10,48~~~~~~~0F20
 1~191,1~~10,49~~~~~~~0F21