Now turns Tibetan into pa'am, not pa'm. Works with or without vowels

in the part preceding the 'am or 'ang, overcoming the inconsistency that I'd put here for a short time.
2003-04-08 04:56:40 +00:00 · 2003-04-08 04:56:40 +00:00 · 7dd67bbf6a
commit 7dd67bbf6a
parent eb71fb6075
2 changed files with 63 additions and 72 deletions
--- a/source/org/thdl/tib/text/TibTextUtils.java
+++ b/source/org/thdl/tib/text/TibTextUtils.java
@ -85,22 +85,26 @@ public class TibTextUtils implements THDLWylieConstants {
 	}

 /**
-* Figures out how to arrange a list of characters into glyphs. For example, if the user types 'bsgr'
-* using the Extended Wylie keyboard, this method figures out that this should be represented
-* as a 'b' glyph followed by a 's-g-r' glyph. If you know that the characters do not
-* contain Sanskrit stacks, or do not contain Tibetan stacks, then you can specify this
-* to speed the process up. Otherwise, the method will first check to see if the characters
-* correspond to any Tibetan stacks, and if not, then it will check for Sanskrit stacks.
-* @param chars the list of Tibetan characters you want to find glyphs for
-* @param areStacksOnRight whether stacking should try to maximize from right to left (true)
-* or from left to right (false). In the Extended Wylie keyboard, you try to stack from
-* right to left. Thus, the character sequence r-g-r would be stacked as r followed by gr,
-* rather than rg followed by r. In the Sambhota and TCC keyboards, the stack direction
-* is reversed.
-* @param definitelyTibetan should be true if the characters are known to be Tibetan and
-* not Sanskrit
-* @param definitelySanskrit should be true if the characters are known to be Sanskrit and
-* not Tibetan
+* Figures out how to arrange a list of characters into glyphs. For
+* example, if the user types 'bsgr' using the Extended Wylie keyboard,
+* this method figures out that this should be represented as a 'b'
+* glyph followed by a 's-g-r' glyph. If you know that the characters
+* do not contain Sanskrit stacks, or do not contain Tibetan stacks,
+* then you can specify this to speed the process up. Otherwise, the
+* method will first check to see if the characters correspond to any
+* Tibetan stacks, and if not, then it will check for Sanskrit stacks.
+* @param chars the list of Tibetan characters you want to find glyphs
+* for
+* @param areStacksOnRight whether stacking should try to maximize from
+* right to left (true) or from left to right (false). In the Extended
+* Wylie keyboard, you try to stack from right to left. Thus, the
+* character sequence r-g-r would be stacked as r followed by gr,
+* rather than rg followed by r. In the Sambhota and TCC keyboards, the
+* stack direction is reversed.
+* @param definitelyTibetan should be true if the characters are known
+* to be Tibetan and not Sanskrit
+* @param definitelySanskrit should be true if the characters are known
+* to be Sanskrit and not Tibetan
 */
 	public static List getGlyphs(List chars, boolean areStacksOnRight, boolean definitelyTibetan, boolean definitelySanskrit) {
 		StringBuffer tibBuffer, sanBuffer;
@ -233,10 +237,10 @@ public class TibTextUtils implements THDLWylieConstants {
 	}

 /**
-* Finds the first meaningful element to occur within a string of Extended Wylie.
-* This could be a character, a vowel,
-* punctuation, or formatting. For example, passed the string 'tshapo',
-* this method will return 'tsh'.
+* Finds the first meaningful element to occur within a string of
+* Extended Wylie.  This could be a character, a vowel, punctuation, or
+* formatting. For example, passed the string 'tshapo', this method
+* will return 'tsh'.
 * @param wylie the String of wylie you want to scan
 * @return the next meaningful subpart of this string, or null if
 * no meaningful subpart can be found (for example 'x' has no equivalent
@ -867,6 +871,7 @@ public class TibTextUtils implements THDLWylieConstants {
                    // prepend:
                    tailEndWylie.insert(0,
                                        ACHUNG
+                                        + aVowelToUseAfter(ACHUNG)
                                        + TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(effectiveSize + 1)));
                    effectiveSize -= 2;
                }
@ -938,27 +943,34 @@ public class TibTextUtils implements THDLWylieConstants {
                                              * 9 words doesn't have
                                              * any ending with d --
                                              * all end with s. */) {
-                        /* Yes, this is ambiguous. How do we handle it?  See this from Andres:
-
-                        I'm posting this upon David Chandler's request. According to Lobsang
-                        Thonden in Modern Tibetan Grammar Language (page 42), with regards to
-                        identifying the root letter in 3 lettered words there are only 23
-                        ambiguous cases. He writes:
-
-                        If the last letter is 'sa' and the first two letters are affixes, then
-                        the SECOND ONE is the root letter in the following 9 WORDS ONLY:
-
-                        gdas gnas gsas dgas dmas bdas mdas 'gas 'das
-
-                        And the FIRST is the root letter in the following 14 WORDS ONLY:
-
-                        rags lags nags bags bangs gangs rangs langs nangs sangs 
-                        babs rabs rams nams
-
-                        As I mentioned before, I think that the best solution for now is to
-                        hard-wire these cases. Even if the list is not exhaustive, at least
-                        we'll have most cases covered.
-
+                        /* Yes, this is ambiguous. How do we handle
+                         * it?  See this from Andres:
+                         *
+                         * I'm posting this upon David Chandler's
+                         * request. According to Lobsang Thonden in
+                         * Modern Tibetan Grammar Language (page 42),
+                         * with regards to identifying the root letter
+                         * in 3 lettered words there are only 23
+                         * ambiguous cases. He writes:
+                         *
+                         * If the last letter is 'sa' and the first
+                         * two letters are affixes, then the SECOND
+                         * ONE is the root letter in the following 9
+                         * WORDS ONLY:
+                         *
+                         * gdas gnas gsas dgas dmas bdas mdas 'gas
+                         * 'das
+                         *
+                         * And the FIRST is the root letter in the
+                         * following 14 WORDS ONLY:
+                         *
+                         * rags lags nags bags bangs gangs rangs langs
+                         * nangs sangs babs rabs rams nams
+                         *
+                         * As I mentioned before, I think that the
+                         * best solution for now is to hard-wire these
+                         * cases. Even if the list is not exhaustive,
+                         * at least we'll have most cases covered.
                         */

                        /* FIXME: these constants are hard-wired here,
@ -978,36 +990,6 @@ public class TibTextUtils implements THDLWylieConstants {
                                                                   wylie2));
                        }

-                        // DLC FIXME: what about ambiguity between
-                        // wa-zur and wa? dwa vs. d.wa, e.g.?
-
-                        // DLC FIXME: disambiguators are needed for
-                        // this case too, as b.lag vs. blag
-                        // illustrates.  Use something based on this,
-                        // from LegalTshegBar.java:
-                        //
-                        //             boolean disambiguatorNeeded = false;
-                        //             char prefix = getPrefix();
-                        //             sb.append(UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeCodepoint(prefix));
-                        //             if (!hasHeadLetter()) {
-                        //                 if (EWC_ya == rootLetter) {
-                        //                     if (isConsonantThatTakesYaBtags(prefix))
-                        //                         disambiguatorNeeded = true;
-                        //                 } else if (EWC_ra == rootLetter) {
-                        //                     if (isConsonantThatTakesRaBtags(prefix))
-                        //                         disambiguatorNeeded = true;
-                        //                 } else if (EWC_la == rootLetter) {
-                        //                     if (isConsonantThatTakesLaBtags(prefix))
-                        //                         disambiguatorNeeded = true;
-                        //                 } else if (EWC_wa == rootLetter) {
-                        //                     if (isConsonantThatTakesWaZur(prefix))
-                        //                         disambiguatorNeeded = true;
-                        //                 }
-                        //             }
-                        //             if (disambiguatorNeeded)
-                        //                 sb.append(WYLIE_DISAMBIGUATING_KEY);
-
-                            
                    } else {
                        /* no ambiguity. the "a" vowel comes after
                         * wylie1. */
@ -1076,6 +1058,11 @@ public class TibTextUtils implements THDLWylieConstants {
                    && currWylie.equals(ACHEN)))
                sb.append(WYLIE_DISAMBIGUATING_KEY);

+            /* le'ang, not le'ng, to be consistent w.r.t. pa'am
+             * vs. pa'm: */
+            if (lastWylie.equals(ACHUNG))
+                sb.append(WYLIE_aVOWEL);
+
 			sb.append(currWylie);

 			lastWylie = currWylie;
--- a/source/org/thdl/tib/text/TibetanMachineWeb.java
+++ b/source/org/thdl/tib/text/TibetanMachineWeb.java
@ -628,6 +628,10 @@ public static boolean isVowel(String s) {
 * transliteration, false if not
 */
 public static boolean isAmbiguousWylie(String x, String y) {
+    // What about ambiguity between wa-zur and wa? dwa vs. d.wa, e.g.?
+    // Doesn't matter, because that's illegal.  wa doesn't take any
+    // prefixes.
+
 	return (("g".equals(x) && "y".equals(y))
            || ("b".equals(x) && "l".equals(y))
            || ("b".equals(x) && "r".equals(y)));