Now turns Tibetan into pa'am, not pa'm. Works with or without vowels
in the part preceding the 'am or 'ang, overcoming the inconsistency that I'd put here for a short time.
This commit is contained in:
parent
eb71fb6075
commit
7dd67bbf6a
2 changed files with 63 additions and 72 deletions
|
@ -85,22 +85,26 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Figures out how to arrange a list of characters into glyphs. For example, if the user types 'bsgr'
|
* Figures out how to arrange a list of characters into glyphs. For
|
||||||
* using the Extended Wylie keyboard, this method figures out that this should be represented
|
* example, if the user types 'bsgr' using the Extended Wylie keyboard,
|
||||||
* as a 'b' glyph followed by a 's-g-r' glyph. If you know that the characters do not
|
* this method figures out that this should be represented as a 'b'
|
||||||
* contain Sanskrit stacks, or do not contain Tibetan stacks, then you can specify this
|
* glyph followed by a 's-g-r' glyph. If you know that the characters
|
||||||
* to speed the process up. Otherwise, the method will first check to see if the characters
|
* do not contain Sanskrit stacks, or do not contain Tibetan stacks,
|
||||||
* correspond to any Tibetan stacks, and if not, then it will check for Sanskrit stacks.
|
* then you can specify this to speed the process up. Otherwise, the
|
||||||
* @param chars the list of Tibetan characters you want to find glyphs for
|
* method will first check to see if the characters correspond to any
|
||||||
* @param areStacksOnRight whether stacking should try to maximize from right to left (true)
|
* Tibetan stacks, and if not, then it will check for Sanskrit stacks.
|
||||||
* or from left to right (false). In the Extended Wylie keyboard, you try to stack from
|
* @param chars the list of Tibetan characters you want to find glyphs
|
||||||
* right to left. Thus, the character sequence r-g-r would be stacked as r followed by gr,
|
* for
|
||||||
* rather than rg followed by r. In the Sambhota and TCC keyboards, the stack direction
|
* @param areStacksOnRight whether stacking should try to maximize from
|
||||||
* is reversed.
|
* right to left (true) or from left to right (false). In the Extended
|
||||||
* @param definitelyTibetan should be true if the characters are known to be Tibetan and
|
* Wylie keyboard, you try to stack from right to left. Thus, the
|
||||||
* not Sanskrit
|
* character sequence r-g-r would be stacked as r followed by gr,
|
||||||
* @param definitelySanskrit should be true if the characters are known to be Sanskrit and
|
* rather than rg followed by r. In the Sambhota and TCC keyboards, the
|
||||||
* not Tibetan
|
* stack direction is reversed.
|
||||||
|
* @param definitelyTibetan should be true if the characters are known
|
||||||
|
* to be Tibetan and not Sanskrit
|
||||||
|
* @param definitelySanskrit should be true if the characters are known
|
||||||
|
* to be Sanskrit and not Tibetan
|
||||||
*/
|
*/
|
||||||
public static List getGlyphs(List chars, boolean areStacksOnRight, boolean definitelyTibetan, boolean definitelySanskrit) {
|
public static List getGlyphs(List chars, boolean areStacksOnRight, boolean definitelyTibetan, boolean definitelySanskrit) {
|
||||||
StringBuffer tibBuffer, sanBuffer;
|
StringBuffer tibBuffer, sanBuffer;
|
||||||
|
@ -233,10 +237,10 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Finds the first meaningful element to occur within a string of Extended Wylie.
|
* Finds the first meaningful element to occur within a string of
|
||||||
* This could be a character, a vowel,
|
* Extended Wylie. This could be a character, a vowel, punctuation, or
|
||||||
* punctuation, or formatting. For example, passed the string 'tshapo',
|
* formatting. For example, passed the string 'tshapo', this method
|
||||||
* this method will return 'tsh'.
|
* will return 'tsh'.
|
||||||
* @param wylie the String of wylie you want to scan
|
* @param wylie the String of wylie you want to scan
|
||||||
* @return the next meaningful subpart of this string, or null if
|
* @return the next meaningful subpart of this string, or null if
|
||||||
* no meaningful subpart can be found (for example 'x' has no equivalent
|
* no meaningful subpart can be found (for example 'x' has no equivalent
|
||||||
|
@ -867,6 +871,7 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
// prepend:
|
// prepend:
|
||||||
tailEndWylie.insert(0,
|
tailEndWylie.insert(0,
|
||||||
ACHUNG
|
ACHUNG
|
||||||
|
+ aVowelToUseAfter(ACHUNG)
|
||||||
+ TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(effectiveSize + 1)));
|
+ TibetanMachineWeb.getWylieForGlyph((DuffCode)glyphList.get(effectiveSize + 1)));
|
||||||
effectiveSize -= 2;
|
effectiveSize -= 2;
|
||||||
}
|
}
|
||||||
|
@ -938,27 +943,34 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
* 9 words doesn't have
|
* 9 words doesn't have
|
||||||
* any ending with d --
|
* any ending with d --
|
||||||
* all end with s. */) {
|
* all end with s. */) {
|
||||||
/* Yes, this is ambiguous. How do we handle it? See this from Andres:
|
/* Yes, this is ambiguous. How do we handle
|
||||||
|
* it? See this from Andres:
|
||||||
I'm posting this upon David Chandler's request. According to Lobsang
|
*
|
||||||
Thonden in Modern Tibetan Grammar Language (page 42), with regards to
|
* I'm posting this upon David Chandler's
|
||||||
identifying the root letter in 3 lettered words there are only 23
|
* request. According to Lobsang Thonden in
|
||||||
ambiguous cases. He writes:
|
* Modern Tibetan Grammar Language (page 42),
|
||||||
|
* with regards to identifying the root letter
|
||||||
If the last letter is 'sa' and the first two letters are affixes, then
|
* in 3 lettered words there are only 23
|
||||||
the SECOND ONE is the root letter in the following 9 WORDS ONLY:
|
* ambiguous cases. He writes:
|
||||||
|
*
|
||||||
gdas gnas gsas dgas dmas bdas mdas 'gas 'das
|
* If the last letter is 'sa' and the first
|
||||||
|
* two letters are affixes, then the SECOND
|
||||||
And the FIRST is the root letter in the following 14 WORDS ONLY:
|
* ONE is the root letter in the following 9
|
||||||
|
* WORDS ONLY:
|
||||||
rags lags nags bags bangs gangs rangs langs nangs sangs
|
*
|
||||||
babs rabs rams nams
|
* gdas gnas gsas dgas dmas bdas mdas 'gas
|
||||||
|
* 'das
|
||||||
As I mentioned before, I think that the best solution for now is to
|
*
|
||||||
hard-wire these cases. Even if the list is not exhaustive, at least
|
* And the FIRST is the root letter in the
|
||||||
we'll have most cases covered.
|
* following 14 WORDS ONLY:
|
||||||
|
*
|
||||||
|
* rags lags nags bags bangs gangs rangs langs
|
||||||
|
* nangs sangs babs rabs rams nams
|
||||||
|
*
|
||||||
|
* As I mentioned before, I think that the
|
||||||
|
* best solution for now is to hard-wire these
|
||||||
|
* cases. Even if the list is not exhaustive,
|
||||||
|
* at least we'll have most cases covered.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/* FIXME: these constants are hard-wired here,
|
/* FIXME: these constants are hard-wired here,
|
||||||
|
@ -978,36 +990,6 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
wylie2));
|
wylie2));
|
||||||
}
|
}
|
||||||
|
|
||||||
// DLC FIXME: what about ambiguity between
|
|
||||||
// wa-zur and wa? dwa vs. d.wa, e.g.?
|
|
||||||
|
|
||||||
// DLC FIXME: disambiguators are needed for
|
|
||||||
// this case too, as b.lag vs. blag
|
|
||||||
// illustrates. Use something based on this,
|
|
||||||
// from LegalTshegBar.java:
|
|
||||||
//
|
|
||||||
// boolean disambiguatorNeeded = false;
|
|
||||||
// char prefix = getPrefix();
|
|
||||||
// sb.append(UnicodeCodepointToThdlWylie.getThdlWylieForUnicodeCodepoint(prefix));
|
|
||||||
// if (!hasHeadLetter()) {
|
|
||||||
// if (EWC_ya == rootLetter) {
|
|
||||||
// if (isConsonantThatTakesYaBtags(prefix))
|
|
||||||
// disambiguatorNeeded = true;
|
|
||||||
// } else if (EWC_ra == rootLetter) {
|
|
||||||
// if (isConsonantThatTakesRaBtags(prefix))
|
|
||||||
// disambiguatorNeeded = true;
|
|
||||||
// } else if (EWC_la == rootLetter) {
|
|
||||||
// if (isConsonantThatTakesLaBtags(prefix))
|
|
||||||
// disambiguatorNeeded = true;
|
|
||||||
// } else if (EWC_wa == rootLetter) {
|
|
||||||
// if (isConsonantThatTakesWaZur(prefix))
|
|
||||||
// disambiguatorNeeded = true;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// if (disambiguatorNeeded)
|
|
||||||
// sb.append(WYLIE_DISAMBIGUATING_KEY);
|
|
||||||
|
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
/* no ambiguity. the "a" vowel comes after
|
/* no ambiguity. the "a" vowel comes after
|
||||||
* wylie1. */
|
* wylie1. */
|
||||||
|
@ -1076,6 +1058,11 @@ public class TibTextUtils implements THDLWylieConstants {
|
||||||
&& currWylie.equals(ACHEN)))
|
&& currWylie.equals(ACHEN)))
|
||||||
sb.append(WYLIE_DISAMBIGUATING_KEY);
|
sb.append(WYLIE_DISAMBIGUATING_KEY);
|
||||||
|
|
||||||
|
/* le'ang, not le'ng, to be consistent w.r.t. pa'am
|
||||||
|
* vs. pa'm: */
|
||||||
|
if (lastWylie.equals(ACHUNG))
|
||||||
|
sb.append(WYLIE_aVOWEL);
|
||||||
|
|
||||||
sb.append(currWylie);
|
sb.append(currWylie);
|
||||||
|
|
||||||
lastWylie = currWylie;
|
lastWylie = currWylie;
|
||||||
|
|
|
@ -628,6 +628,10 @@ public static boolean isVowel(String s) {
|
||||||
* transliteration, false if not
|
* transliteration, false if not
|
||||||
*/
|
*/
|
||||||
public static boolean isAmbiguousWylie(String x, String y) {
|
public static boolean isAmbiguousWylie(String x, String y) {
|
||||||
|
// What about ambiguity between wa-zur and wa? dwa vs. d.wa, e.g.?
|
||||||
|
// Doesn't matter, because that's illegal. wa doesn't take any
|
||||||
|
// prefixes.
|
||||||
|
|
||||||
return (("g".equals(x) && "y".equals(y))
|
return (("g".equals(x) && "y".equals(y))
|
||||||
|| ("b".equals(x) && "l".equals(y))
|
|| ("b".equals(x) && "l".equals(y))
|
||||||
|| ("b".equals(x) && "r".equals(y)));
|
|| ("b".equals(x) && "r".equals(y)));
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue