Now {Pm} is treated like {PAm}; {Pm:} is like {PAm:}; {P:} is like {PA:}.

This commit is contained in:
dchandler 2003-11-30 02:06:48 +00:00
parent e7c4cc1874
commit ac412c994b
5 changed files with 56 additions and 19 deletions

View file

@ -328,7 +328,7 @@ public class ACIPConverter {
hasErrors = true;
uni = err;
}
if (null != writer) writer.write(uni);
writer.write(uni);
}
if (null != tdoc) {
String wylie

View file

@ -83,6 +83,9 @@ public class ACIPRules {
// Keep this code in sync with getWylieForACIPVowel.
}
// {Pm} is treated just like {PAm}; {P:} is treated just
// like {PA:}; {Pm:} is treated just like {PAm:}. But
// that happens thanks to
}
return (acipVowels.contains(s));
}
@ -276,6 +279,10 @@ public class ACIPRules {
putMapping(acipVowel2wylie, baseVowels[i][0] + "m:", baseVowels[i][1] + "MH");
putMapping(acipVowel2wylie, '\'' + baseVowels[i][0] + "m:", baseVowels[i][2] + "MH");
}
// {Pm} is treated just like {PAm}; {P:} is treated just
// like {PA:}; {Pm:} is treated just like {PAm:}. But
// that happens thanks to
// TPairListFactory.getFirstConsonantAndVowel(StringBuffer,int[]).
}
return (String)acipVowel2wylie.get(acip);
}
@ -475,12 +482,13 @@ public class ACIPRules {
superACIP2unicode.put("'im:", "\u0F71\u0F80\u0F7E\u0F7F");
// :m does not appear, though you'd think it's as valid as m:.
// I doubt these will occur alone:
superACIP2unicode.put("m", "\u0F7E");
superACIP2unicode.put(":", "\u0F7F");
superACIP2unicode.put("m:", "\u0F7E\u0F7F");
superACIP2unicode.put("Am", "\u0F7E");
superACIP2unicode.put("A:", "\u0F7F");
superACIP2unicode.put("Am:", "\u0F7E\u0F7F");
superACIP2unicode.put("0", "\u0F20");
superACIP2unicode.put("1", "\u0F21");
@ -567,12 +575,11 @@ public class ACIPRules {
if (vowel.indexOf('m') >= 0) {
DuffCode last = (DuffCode)duff.get(duff.size() - 1);
duff.remove(duff.size() - 1);
duff.remove(duff.size() - 1); // getBindu will add it back...
TibTextUtils.getBindu(duff, last);
}
if (vowel.indexOf(':') >= 0)
duff.add(TibetanMachineWeb.getGlyph("H"));
}
/** Returns true if and only if l is the ACIP representation of a

View file

@ -656,13 +656,13 @@ tstHelper("KA'", "[(K . A), (' . )]",
"{S+P+YO}{M+S}",
"{S+P+YO}{M}{S}",
});
tstHelper(":'AO", "[(: . -), (' . ), (A . O)]");
tstHelper("m'AO", "[(m . -), (' . ), (A . O)]");
tstHelper("m:'AO", "[(m . -), (: . -), (' . ), (A . O)]");
tstHelper(":'AO", "[( . A:), (' . ), (A . O)]");
tstHelper("m'AO", "[( . Am), (' . ), (A . O)]");
tstHelper("m:'AO", "[( . Am:), (' . ), (A . O)]");
tstHelper("AA:", "[(A . A:)]", new String[] { "{AA:}" });
tstHelper("KE:", "[(K . E:)]");
tstHelper("K:", "[(K . ), (: . )]",
new String[] { /* No parses exist. "K:" is illegal. */ });
tstHelper("K:", "[(K . A:)]",
new String[] { "{KA:}" });
tstHelper("'AO", "[(' . ), (A . O)]");
tstHelper("'AOM", "[(' . ), (A . O), (M . )]");
@ -717,8 +717,8 @@ tstHelper("KA'", "[(K . A), (' . )]",
tstHelper("TAA", "[(T . ), (A . A)]");
tstHelper("DAA", "[(D . ), (A . A)]");
tstHelper("DAAm", "[(D . ), (A . Am)]");
tstHelper("DAAm:", "[(D . ), (A . Am:)]");
tstHelper("DAAm", "[(D . A), (A . Am)]");
tstHelper("DAAm:", "[(D . A), (A . Am:)]");
tstHelper("DA'im:", "[(D . A), (' . im:)]");
tstHelper("NA+YA", "[(N . +), (Y . A)]");
@ -7196,6 +7196,8 @@ tstHelper("ZUR");
/** Tests {@link ACIPTshegBarScanner#scan(String, StringBuffer, int)}. */
public void testScanner() {
shelp("Pm KA", "", "[TIBETAN_NON_PUNCTUATION:{Pm}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{KA}]");
shelp("KA (KHA\nGA)", "", "[TIBETAN_NON_PUNCTUATION:{KA}, TIBETAN_PUNCTUATION:{ }, START_PAREN:{(}, TIBETAN_NON_PUNCTUATION:{KHA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{GA}, END_PAREN:{)}]");
shelp("LA...SGRUB",
@ -7416,6 +7418,7 @@ G+NA
MNA'
M+NA
*/
uhelp("B+NA", "\u0f56\u0fa3");
uhelp("BNA", "[#WARNING CONVERTING ACIP DOCUMENT: Warning: We're going with {B+NA}, but only because our knowledge of prefix rules says that {B}{NA} is not a legal Tibetan tsheg bar (\"syllable\")]\u0f56\u0fa3");
uhelp("^GONG SA", "\u0f38\u0f42\u0f7c\u0f44\u0f0b\u0f66");
uhelp("^ GONG SA", "\u0f38\u0f42\u0f7c\u0f44\u0f0b\u0f66");
@ -7438,9 +7441,10 @@ M+NA
uhelp("x", "[#ERROR CONVERTING ACIP DOCUMENT: Lexical error: The ACIP x must be glued to the end of a tsheg bar, but this one was not]");
uhelp("o", "[#ERROR CONVERTING ACIP DOCUMENT: Lexical error: The ACIP o must be glued to the end of a tsheg bar, but this one was not]");
uhelp("%", "[#ERROR CONVERTING ACIP DOCUMENT: Lexical error: The ACIP % must be glued to the end of a tsheg bar, but this one was not][#WARNING CONVERTING ACIP DOCUMENT: Lexical warning: The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice]");
uhelp(":", "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") : HAS THESE ERRORS: Cannot convert ACIP : because : is not an ACIP consonant]");
uhelp("m", "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") m HAS THESE ERRORS: Cannot convert ACIP m because m is not an ACIP consonant]");
uhelp(":", "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") : HAS THESE ERRORS: Cannot convert ACIP A: because A: is a \"vowel\" without an associated consonant]");
uhelp("m", "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") m HAS THESE ERRORS: Cannot convert ACIP Am because Am is a \"vowel\" without an associated consonant]");
uhelp("N+YA", "\u0f53\u0fb1");
uhelp("NA+YA", "\u0f53\u0fb1"); // FIXME: warn about the extra A
uhelp("NE+YA", "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") NE+YA HAS THESE ERRORS: Cannot convert ACIP NE+-YA because + is not an ACIP consonant]");
uhelp("tRAStA", "\u0f4a\u0fb2\u0f66\u0f9a");
@ -7503,6 +7507,17 @@ M+NA
uhelp("WWA", "\u0f5d\u0fba");
uhelp("W+WA", "\u0f5d\u0fba");
tstHelper("Km:", "{KAm:}",
new String[] { "{KAm:}" },
new String[] { },
"{KAm:}");
uhelp("Km:", "\u0f40\u0f7e\u0f7f");
uhelp("KAm:", "\u0f40\u0f7e\u0f7f");
uhelp("Km", "\u0f40\u0f7e");
uhelp("KAm", "\u0f40\u0f7e");
uhelp("K:", "\u0f40\u0f7f");
uhelp("KA:", "\u0f40\u0f7f");
uhelp("/NY'EE/", "\u0f3C\u0f49\u0F71\u0F7B\u0f3D");
uhelp("*#HUm: G+DHOO GRO`;.,",
"\u0f04\u0f05\u0f04\u0f05\u0f05\u0f67\u0f74\u0f7e\u0f7f\u0f0b\u0f42\u0fa2\u0f7d\u0f0b\u0f42\u0fb2\u0f7c\u0f08\u0f11\u0f0c\u0f0d");

View file

@ -213,12 +213,14 @@ class TPair {
void getUnicode(StringBuffer sb, boolean subscribed) {
if (null != getLeft()) {
String x = ACIPRules.getUnicodeFor(getLeft(), subscribed);
if (null != x) sb.append(x);
if (null == x) throw new Error("TPair: " + getLeft() + " has no Uni");
sb.append(x);
}
if (null != getRight()
&& !("-".equals(getRight()) || "A".equals(getRight()))) {
&& !("-".equals(getRight()) || "+".equals(getRight()) || "A".equals(getRight()))) {
String x = ACIPRules.getUnicodeFor(getRight(), subscribed);
if (null != x) sb.append(x);
if (null == x) throw new Error("TPair: " + getRight() + " has no Uni");
sb.append(x);
}
}

View file

@ -95,7 +95,7 @@ class TPairListFactory {
* 'ANG" circumstances
* @param weHaveSeenVowelAlready true if and only if, in our
* recursion, we've already found one vowel (not a disambiguator,
* but a vowel like "A", "E", "Um:", "'U", etc.) */
* but a vowel like "A", "E", "Um:", "m", "'U", etc.) */
private static TPairList breakHelper(String acip, boolean tickIsVowel, boolean weHaveSeenVowelAlready) {
// base case for our recursion:
@ -212,7 +212,11 @@ class TPairListFactory {
}
for (i = Math.min(ACIPRules.MAX_VOWEL_LENGTH, xl - ll); i >= 1; i--) {
String t = null;
if (ACIPRules.isVowel(t = acip.substring(ll, ll + i))) {
if (ACIPRules.isVowel(t = acip.substring(ll, ll + i))
// Or these, which we massage into "Am", "Am:", and
// "A:" because I didn't think {Pm} should be treated
// like {PAm} originally:
|| "m".equals(t) || "m:".equals(t) || ":".equals(t)) {
r = t;
break;
}
@ -227,6 +231,14 @@ class TPairListFactory {
return new TPair(l, "+");
}
// Allow Pm to mean PAm, P: to mean PA:, Pm: to mean PAm:.
int mod = 0;
if ("m".equals(r)) { r = "Am"; mod = -1; }
if (":".equals(r)) { r = "A:"; mod = -1; }
if ("m:".equals(r)) { r = "Am:"; mod = -1; }
if (":m".equals(r)) { r = "A:m"; mod = -1; } // not seen, though...
// what if we see a character that's not part of any vowel or
// consonant? We return it.
if (null == l && null == r) {
@ -236,7 +248,8 @@ class TPairListFactory {
}
howMuch[0] = (((l == null) ? 0 : l.length())
+ ((r == null) ? 0 : r.length()));
+ ((r == null) ? 0 : r.length())
+ mod);
return new TPair(l, r);
}
}