Now turns Tibetan into pa'am, not pa'm. Works with or without vowels

in the part preceding the 'am or 'ang, overcoming the inconsistency that I'd put here for a short time.
2003-04-08 04:56:40 +00:00 · 2003-04-08 04:56:40 +00:00 · 7dd67bbf6a
commit 7dd67bbf6a
parent eb71fb6075
2 changed files with 63 additions and 72 deletions
--- a/source/org/thdl/tib/text/TibTextUtils.java
+++ b/source/org/thdl/tib/text/TibTextUtils.java
@ -85,22 +85,26 @@ public class TibTextUtils implements THDLWylieConstants {
 	}
 /**
-* Figures out how to arrange a list of characters into glyphs. For example, if the user types 'bsgr'
+* Figures out how to arrange a list of characters into glyphs. For
-* using the Extended Wylie keyboard, this method figures out that this should be represented
+* example, if the user types 'bsgr' using the Extended Wylie keyboard,
-* as a 'b' glyph followed by a 's-g-r' glyph. If you know that the characters do not
+* this method figures out that this should be represented as a 'b'
-* contain Sanskrit stacks, or do not contain Tibetan stacks, then you can specify this
+* glyph followed by a 's-g-r' glyph. If you know that the characters
-* to speed the process up. Otherwise, the method will first check to see if the characters
+* do not contain Sanskrit stacks, or do not contain Tibetan stacks,
-* correspond to any Tibetan stacks, and if not, then it will check for Sanskrit stacks.
+* then you can specify this to speed the process up. Otherwise, the
-* @param chars the list of Tibetan characters you want to find glyphs for
+* method will first check to see if the characters correspond to any
-* @param areStacksOnRight whether stacking should try to maximize from right to left (true)
+* Tibetan stacks, and if not, then it will check for Sanskrit stacks.
-* or from left to right (false). In the Extended Wylie keyboard, you try to stack from
+* @param chars the list of Tibetan characters you want to find glyphs
-* right to left. Thus, the character sequence r-g-r would be stacked as r followed by gr,
+* for
-* rather than rg followed by r. In the Sambhota and TCC keyboards, the stack direction
+* @param areStacksOnRight whether stacking should try to maximize from
-* is reversed.
+* right to left (true) or from left to right (false). In the Extended
-* @param definitelyTibetan should be true if the characters are known to be Tibetan and
+* Wylie keyboard, you try to stack from right to left. Thus, the
-* not Sanskrit
+* character sequence r-g-r would be stacked as r followed by gr,
-* @param definitelySanskrit should be true if the characters are known to be Sanskrit and
+* rather than rg followed by r. In the Sambhota and TCC keyboards, the
-* not Tibetan
+* stack direction is reversed.
 * @param definitelyTibetan should be true if the characters are known
 * to be Tibetan and not Sanskrit
 * @param definitelySanskrit should be true if the characters are known
 * to be Sanskrit and not Tibetan
 */
 	public static List getGlyphs(List chars, boolean areStacksOnRight, boolean definitelyTibetan, boolean definitelySanskrit) {
 		StringBuffer tibBuffer, sanBuffer;
@ -233,10 +237,10 @@ public class TibTextUtils implements THDLWylieConstants {
 	}
 /**
-* Finds the first meaningful element to occur within a string of Extended Wylie.
+* Finds the first meaningful element to occur within a string of
-* This could be a character, a vowel,
+* Extended Wylie.  This could be a character, a vowel, punctuation, or
-* punctuation, or formatting. For example, passed the string 'tshapo',
+* formatting. For example, passed the string 'tshapo', this method
-* this method will return 'tsh'.
+* will return 'tsh'.
 * @param wylie the String of wylie you want to scan
 * @return the next meaningful subpart of this string, or null if
 * no meaningful subpart can be found (for example 'x' has no equivalent
@ -867,6 +871,7 @@ public class TibTextUtils implements THDLWylieConstants {
                    // prepend:
                    tailEndWylie.insert(0,
                                        ACHUNG
                                        + aVowelToUseAfter(ACHUNG)
                                        + TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(effectiveSize + 1)));
                    effectiveSize -= 2;
                }
@ -938,27 +943,34 @@ public class TibTextUtils implements THDLWylieConstants {
                                              * 9 words doesn't have
                                              * any ending with d --
                                              * all end with s. */) {
-                        /* Yes, this is ambiguous. How do we handle it?  See this from Andres:
+                        /* Yes, this is ambiguous. How do we handle
-
+                         * it?  See this from Andres:
-                        I'm posting this upon David Chandler's request. According to Lobsang
+                         *
-                        Thonden in Modern Tibetan Grammar Language (page 42), with regards to
+                         * I'm posting this upon David Chandler's
-                        identifying the root letter in 3 lettered words there are only 23
+                         * request. According to Lobsang Thonden in
-                        ambiguous cases. He writes:
+                         * Modern Tibetan Grammar Language (page 42),
-
+                         * with regards to identifying the root letter
-                        If the last letter is 'sa' and the first two letters are affixes, then
+                         * in 3 lettered words there are only 23
-                        the SECOND ONE is the root letter in the following 9 WORDS ONLY:
+                         * ambiguous cases. He writes:
-
+                         *
-                        gdas gnas gsas dgas dmas bdas mdas 'gas 'das
+                         * If the last letter is 'sa' and the first
-
+                         * two letters are affixes, then the SECOND
-                        And the FIRST is the root letter in the following 14 WORDS ONLY:
+                         * ONE is the root letter in the following 9
-
+                         * WORDS ONLY:
-                        rags lags nags bags bangs gangs rangs langs nangs sangs 
+                         *
-                        babs rabs rams nams
+                         * gdas gnas gsas dgas dmas bdas mdas 'gas
-
+                         * 'das
-                        As I mentioned before, I think that the best solution for now is to
+                         *
-                        hard-wire these cases. Even if the list is not exhaustive, at least
+                         * And the FIRST is the root letter in the
-                        we'll have most cases covered.
+                         * following 14 WORDS ONLY:
-
+                         *
                         * rags lags nags bags bangs gangs rangs langs
                         * nangs sangs babs rabs rams nams
                         *
                         * As I mentioned before, I think that the
                         * best solution for now is to hard-wire these
                         * cases. Even if the list is not exhaustive,
                         * at least we'll have most cases covered.
                         */
                        /* FIXME: these constants are hard-wired here,
@ -978,36 +990,6 @@ public class TibTextUtils implements THDLWylieConstants {
                                                                   wylie2));
                        }
                        // DLC FIXME: what about ambiguity between
                        // wa-zur and wa? dwa vs. d.wa, e.g.?
                        // DLC FIXME: disambiguators are needed for
                        // this case too, as b.lag vs. blag
                        // illustrates.  Use something based on this,
                        // from LegalTshegBar.java:
                        //
                        //             boolean disambiguatorNeeded = false;
                        //             char prefix = getPrefix();
                        //             sb.append(UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeCodepoint(prefix));
                        //             if (!hasHeadLetter()) {
                        //                 if (EWC_ya == rootLetter) {
                        //                     if (isConsonantThatTakesYaBtags(prefix))
                        //                         disambiguatorNeeded = true;
                        //                 } else if (EWC_ra == rootLetter) {
                        //                     if (isConsonantThatTakesRaBtags(prefix))
                        //                         disambiguatorNeeded = true;
                        //                 } else if (EWC_la == rootLetter) {
                        //                     if (isConsonantThatTakesLaBtags(prefix))
                        //                         disambiguatorNeeded = true;
                        //                 } else if (EWC_wa == rootLetter) {
                        //                     if (isConsonantThatTakesWaZur(prefix))
                        //                         disambiguatorNeeded = true;
                        //                 }
                        //             }
                        //             if (disambiguatorNeeded)
                        //                 sb.append(WYLIE_DISAMBIGUATING_KEY);
                    } else {
                        /* no ambiguity. the "a" vowel comes after
                         * wylie1. */
@ -1076,6 +1058,11 @@ public class TibTextUtils implements THDLWylieConstants {
                    && currWylie.equals(ACHEN)))
                sb.append(WYLIE_DISAMBIGUATING_KEY);
            /* le'ang, not le'ng, to be consistent w.r.t. pa'am
             * vs. pa'm: */
            if (lastWylie.equals(ACHUNG))
                sb.append(WYLIE_aVOWEL);
 			sb.append(currWylie);
 			lastWylie = currWylie;
--- a/source/org/thdl/tib/text/TibetanMachineWeb.java
+++ b/source/org/thdl/tib/text/TibetanMachineWeb.java
@ -628,6 +628,10 @@ public static boolean isVowel(String s) {
 * transliteration, false if not
 */
 public static boolean isAmbiguousWylie(String x, String y) {
    // What about ambiguity between wa-zur and wa? dwa vs. d.wa, e.g.?
    // Doesn't matter, because that's illegal.  wa doesn't take any
    // prefixes.
 	return (("g".equals(x) && "y".equals(y))
            || ("b".equals(x) && "l".equals(y))
            || ("b".equals(x) && "r".equals(y)));