Now {Pm} is treated like {PAm}; {Pm:} is like {PAm:}; {P:} is like {PA:}.
This commit is contained in:
parent
e7c4cc1874
commit
ac412c994b
5 changed files with 56 additions and 19 deletions
|
@ -328,7 +328,7 @@ public class ACIPConverter {
|
||||||
hasErrors = true;
|
hasErrors = true;
|
||||||
uni = err;
|
uni = err;
|
||||||
}
|
}
|
||||||
if (null != writer) writer.write(uni);
|
writer.write(uni);
|
||||||
}
|
}
|
||||||
if (null != tdoc) {
|
if (null != tdoc) {
|
||||||
String wylie
|
String wylie
|
||||||
|
|
|
@ -83,6 +83,9 @@ public class ACIPRules {
|
||||||
|
|
||||||
// Keep this code in sync with getWylieForACIPVowel.
|
// Keep this code in sync with getWylieForACIPVowel.
|
||||||
}
|
}
|
||||||
|
// {Pm} is treated just like {PAm}; {P:} is treated just
|
||||||
|
// like {PA:}; {Pm:} is treated just like {PAm:}. But
|
||||||
|
// that happens thanks to
|
||||||
}
|
}
|
||||||
return (acipVowels.contains(s));
|
return (acipVowels.contains(s));
|
||||||
}
|
}
|
||||||
|
@ -276,6 +279,10 @@ public class ACIPRules {
|
||||||
putMapping(acipVowel2wylie, baseVowels[i][0] + "m:", baseVowels[i][1] + "MH");
|
putMapping(acipVowel2wylie, baseVowels[i][0] + "m:", baseVowels[i][1] + "MH");
|
||||||
putMapping(acipVowel2wylie, '\'' + baseVowels[i][0] + "m:", baseVowels[i][2] + "MH");
|
putMapping(acipVowel2wylie, '\'' + baseVowels[i][0] + "m:", baseVowels[i][2] + "MH");
|
||||||
}
|
}
|
||||||
|
// {Pm} is treated just like {PAm}; {P:} is treated just
|
||||||
|
// like {PA:}; {Pm:} is treated just like {PAm:}. But
|
||||||
|
// that happens thanks to
|
||||||
|
// TPairListFactory.getFirstConsonantAndVowel(StringBuffer,int[]).
|
||||||
}
|
}
|
||||||
return (String)acipVowel2wylie.get(acip);
|
return (String)acipVowel2wylie.get(acip);
|
||||||
}
|
}
|
||||||
|
@ -475,12 +482,13 @@ public class ACIPRules {
|
||||||
superACIP2unicode.put("'im:", "\u0F71\u0F80\u0F7E\u0F7F");
|
superACIP2unicode.put("'im:", "\u0F71\u0F80\u0F7E\u0F7F");
|
||||||
// :m does not appear, though you'd think it's as valid as m:.
|
// :m does not appear, though you'd think it's as valid as m:.
|
||||||
|
|
||||||
// I doubt these will occur alone:
|
|
||||||
superACIP2unicode.put("m", "\u0F7E");
|
superACIP2unicode.put("m", "\u0F7E");
|
||||||
superACIP2unicode.put(":", "\u0F7F");
|
superACIP2unicode.put(":", "\u0F7F");
|
||||||
|
superACIP2unicode.put("m:", "\u0F7E\u0F7F");
|
||||||
|
|
||||||
superACIP2unicode.put("Am", "\u0F7E");
|
superACIP2unicode.put("Am", "\u0F7E");
|
||||||
superACIP2unicode.put("A:", "\u0F7F");
|
superACIP2unicode.put("A:", "\u0F7F");
|
||||||
|
superACIP2unicode.put("Am:", "\u0F7E\u0F7F");
|
||||||
|
|
||||||
superACIP2unicode.put("0", "\u0F20");
|
superACIP2unicode.put("0", "\u0F20");
|
||||||
superACIP2unicode.put("1", "\u0F21");
|
superACIP2unicode.put("1", "\u0F21");
|
||||||
|
@ -567,12 +575,11 @@ public class ACIPRules {
|
||||||
|
|
||||||
if (vowel.indexOf('m') >= 0) {
|
if (vowel.indexOf('m') >= 0) {
|
||||||
DuffCode last = (DuffCode)duff.get(duff.size() - 1);
|
DuffCode last = (DuffCode)duff.get(duff.size() - 1);
|
||||||
duff.remove(duff.size() - 1);
|
duff.remove(duff.size() - 1); // getBindu will add it back...
|
||||||
TibTextUtils.getBindu(duff, last);
|
TibTextUtils.getBindu(duff, last);
|
||||||
}
|
}
|
||||||
if (vowel.indexOf(':') >= 0)
|
if (vowel.indexOf(':') >= 0)
|
||||||
duff.add(TibetanMachineWeb.getGlyph("H"));
|
duff.add(TibetanMachineWeb.getGlyph("H"));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns true if and only if l is the ACIP representation of a
|
/** Returns true if and only if l is the ACIP representation of a
|
||||||
|
|
|
@ -656,13 +656,13 @@ tstHelper("KA'", "[(K . A), (' . )]",
|
||||||
"{S+P+YO}{M+S}",
|
"{S+P+YO}{M+S}",
|
||||||
"{S+P+YO}{M}{S}",
|
"{S+P+YO}{M}{S}",
|
||||||
});
|
});
|
||||||
tstHelper(":'AO", "[(: . -), (' . ), (A . O)]");
|
tstHelper(":'AO", "[( . A:), (' . ), (A . O)]");
|
||||||
tstHelper("m'AO", "[(m . -), (' . ), (A . O)]");
|
tstHelper("m'AO", "[( . Am), (' . ), (A . O)]");
|
||||||
tstHelper("m:'AO", "[(m . -), (: . -), (' . ), (A . O)]");
|
tstHelper("m:'AO", "[( . Am:), (' . ), (A . O)]");
|
||||||
tstHelper("AA:", "[(A . A:)]", new String[] { "{AA:}" });
|
tstHelper("AA:", "[(A . A:)]", new String[] { "{AA:}" });
|
||||||
tstHelper("KE:", "[(K . E:)]");
|
tstHelper("KE:", "[(K . E:)]");
|
||||||
tstHelper("K:", "[(K . ), (: . )]",
|
tstHelper("K:", "[(K . A:)]",
|
||||||
new String[] { /* No parses exist. "K:" is illegal. */ });
|
new String[] { "{KA:}" });
|
||||||
tstHelper("'AO", "[(' . ), (A . O)]");
|
tstHelper("'AO", "[(' . ), (A . O)]");
|
||||||
tstHelper("'AOM", "[(' . ), (A . O), (M . )]");
|
tstHelper("'AOM", "[(' . ), (A . O), (M . )]");
|
||||||
|
|
||||||
|
@ -717,8 +717,8 @@ tstHelper("KA'", "[(K . A), (' . )]",
|
||||||
|
|
||||||
tstHelper("TAA", "[(T . ), (A . A)]");
|
tstHelper("TAA", "[(T . ), (A . A)]");
|
||||||
tstHelper("DAA", "[(D . ), (A . A)]");
|
tstHelper("DAA", "[(D . ), (A . A)]");
|
||||||
tstHelper("DAAm", "[(D . ), (A . Am)]");
|
tstHelper("DAAm", "[(D . A), (A . Am)]");
|
||||||
tstHelper("DAAm:", "[(D . ), (A . Am:)]");
|
tstHelper("DAAm:", "[(D . A), (A . Am:)]");
|
||||||
tstHelper("DA'im:", "[(D . A), (' . im:)]");
|
tstHelper("DA'im:", "[(D . A), (' . im:)]");
|
||||||
|
|
||||||
tstHelper("NA+YA", "[(N . +), (Y . A)]");
|
tstHelper("NA+YA", "[(N . +), (Y . A)]");
|
||||||
|
@ -7196,6 +7196,8 @@ tstHelper("ZUR");
|
||||||
|
|
||||||
/** Tests {@link ACIPTshegBarScanner#scan(String, StringBuffer, int)}. */
|
/** Tests {@link ACIPTshegBarScanner#scan(String, StringBuffer, int)}. */
|
||||||
public void testScanner() {
|
public void testScanner() {
|
||||||
|
shelp("Pm KA", "", "[TIBETAN_NON_PUNCTUATION:{Pm}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{KA}]");
|
||||||
|
|
||||||
shelp("KA (KHA\nGA)", "", "[TIBETAN_NON_PUNCTUATION:{KA}, TIBETAN_PUNCTUATION:{ }, START_PAREN:{(}, TIBETAN_NON_PUNCTUATION:{KHA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{GA}, END_PAREN:{)}]");
|
shelp("KA (KHA\nGA)", "", "[TIBETAN_NON_PUNCTUATION:{KA}, TIBETAN_PUNCTUATION:{ }, START_PAREN:{(}, TIBETAN_NON_PUNCTUATION:{KHA}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{GA}, END_PAREN:{)}]");
|
||||||
|
|
||||||
shelp("LA...SGRUB",
|
shelp("LA...SGRUB",
|
||||||
|
@ -7416,6 +7418,7 @@ G+NA
|
||||||
MNA'
|
MNA'
|
||||||
M+NA
|
M+NA
|
||||||
*/
|
*/
|
||||||
|
uhelp("B+NA", "\u0f56\u0fa3");
|
||||||
uhelp("BNA", "[#WARNING CONVERTING ACIP DOCUMENT: Warning: We're going with {B+NA}, but only because our knowledge of prefix rules says that {B}{NA} is not a legal Tibetan tsheg bar (\"syllable\")]\u0f56\u0fa3");
|
uhelp("BNA", "[#WARNING CONVERTING ACIP DOCUMENT: Warning: We're going with {B+NA}, but only because our knowledge of prefix rules says that {B}{NA} is not a legal Tibetan tsheg bar (\"syllable\")]\u0f56\u0fa3");
|
||||||
uhelp("^GONG SA", "\u0f38\u0f42\u0f7c\u0f44\u0f0b\u0f66");
|
uhelp("^GONG SA", "\u0f38\u0f42\u0f7c\u0f44\u0f0b\u0f66");
|
||||||
uhelp("^ GONG SA", "\u0f38\u0f42\u0f7c\u0f44\u0f0b\u0f66");
|
uhelp("^ GONG SA", "\u0f38\u0f42\u0f7c\u0f44\u0f0b\u0f66");
|
||||||
|
@ -7438,9 +7441,10 @@ M+NA
|
||||||
uhelp("x", "[#ERROR CONVERTING ACIP DOCUMENT: Lexical error: The ACIP x must be glued to the end of a tsheg bar, but this one was not]");
|
uhelp("x", "[#ERROR CONVERTING ACIP DOCUMENT: Lexical error: The ACIP x must be glued to the end of a tsheg bar, but this one was not]");
|
||||||
uhelp("o", "[#ERROR CONVERTING ACIP DOCUMENT: Lexical error: The ACIP o must be glued to the end of a tsheg bar, but this one was not]");
|
uhelp("o", "[#ERROR CONVERTING ACIP DOCUMENT: Lexical error: The ACIP o must be glued to the end of a tsheg bar, but this one was not]");
|
||||||
uhelp("%", "[#ERROR CONVERTING ACIP DOCUMENT: Lexical error: The ACIP % must be glued to the end of a tsheg bar, but this one was not][#WARNING CONVERTING ACIP DOCUMENT: Lexical warning: The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice]");
|
uhelp("%", "[#ERROR CONVERTING ACIP DOCUMENT: Lexical error: The ACIP % must be glued to the end of a tsheg bar, but this one was not][#WARNING CONVERTING ACIP DOCUMENT: Lexical warning: The ACIP {%} is treated by this converter as U+0F35, but sometimes might represent U+0F14 in practice]");
|
||||||
uhelp(":", "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") : HAS THESE ERRORS: Cannot convert ACIP : because : is not an ACIP consonant]");
|
uhelp(":", "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") : HAS THESE ERRORS: Cannot convert ACIP A: because A: is a \"vowel\" without an associated consonant]");
|
||||||
uhelp("m", "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") m HAS THESE ERRORS: Cannot convert ACIP m because m is not an ACIP consonant]");
|
uhelp("m", "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") m HAS THESE ERRORS: Cannot convert ACIP Am because Am is a \"vowel\" without an associated consonant]");
|
||||||
|
|
||||||
|
uhelp("N+YA", "\u0f53\u0fb1");
|
||||||
uhelp("NA+YA", "\u0f53\u0fb1"); // FIXME: warn about the extra A
|
uhelp("NA+YA", "\u0f53\u0fb1"); // FIXME: warn about the extra A
|
||||||
uhelp("NE+YA", "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") NE+YA HAS THESE ERRORS: Cannot convert ACIP NE+-YA because + is not an ACIP consonant]");
|
uhelp("NE+YA", "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") NE+YA HAS THESE ERRORS: Cannot convert ACIP NE+-YA because + is not an ACIP consonant]");
|
||||||
uhelp("tRAStA", "\u0f4a\u0fb2\u0f66\u0f9a");
|
uhelp("tRAStA", "\u0f4a\u0fb2\u0f66\u0f9a");
|
||||||
|
@ -7503,6 +7507,17 @@ M+NA
|
||||||
uhelp("WWA", "\u0f5d\u0fba");
|
uhelp("WWA", "\u0f5d\u0fba");
|
||||||
uhelp("W+WA", "\u0f5d\u0fba");
|
uhelp("W+WA", "\u0f5d\u0fba");
|
||||||
|
|
||||||
|
tstHelper("Km:", "{KAm:}",
|
||||||
|
new String[] { "{KAm:}" },
|
||||||
|
new String[] { },
|
||||||
|
"{KAm:}");
|
||||||
|
uhelp("Km:", "\u0f40\u0f7e\u0f7f");
|
||||||
|
uhelp("KAm:", "\u0f40\u0f7e\u0f7f");
|
||||||
|
uhelp("Km", "\u0f40\u0f7e");
|
||||||
|
uhelp("KAm", "\u0f40\u0f7e");
|
||||||
|
uhelp("K:", "\u0f40\u0f7f");
|
||||||
|
uhelp("KA:", "\u0f40\u0f7f");
|
||||||
|
|
||||||
uhelp("/NY'EE/", "\u0f3C\u0f49\u0F71\u0F7B\u0f3D");
|
uhelp("/NY'EE/", "\u0f3C\u0f49\u0F71\u0F7B\u0f3D");
|
||||||
uhelp("*#HUm: G+DHOO GRO`;.,",
|
uhelp("*#HUm: G+DHOO GRO`;.,",
|
||||||
"\u0f04\u0f05\u0f04\u0f05\u0f05\u0f67\u0f74\u0f7e\u0f7f\u0f0b\u0f42\u0fa2\u0f7d\u0f0b\u0f42\u0fb2\u0f7c\u0f08\u0f11\u0f0c\u0f0d");
|
"\u0f04\u0f05\u0f04\u0f05\u0f05\u0f67\u0f74\u0f7e\u0f7f\u0f0b\u0f42\u0fa2\u0f7d\u0f0b\u0f42\u0fb2\u0f7c\u0f08\u0f11\u0f0c\u0f0d");
|
||||||
|
|
|
@ -213,12 +213,14 @@ class TPair {
|
||||||
void getUnicode(StringBuffer sb, boolean subscribed) {
|
void getUnicode(StringBuffer sb, boolean subscribed) {
|
||||||
if (null != getLeft()) {
|
if (null != getLeft()) {
|
||||||
String x = ACIPRules.getUnicodeFor(getLeft(), subscribed);
|
String x = ACIPRules.getUnicodeFor(getLeft(), subscribed);
|
||||||
if (null != x) sb.append(x);
|
if (null == x) throw new Error("TPair: " + getLeft() + " has no Uni");
|
||||||
|
sb.append(x);
|
||||||
}
|
}
|
||||||
if (null != getRight()
|
if (null != getRight()
|
||||||
&& !("-".equals(getRight()) || "A".equals(getRight()))) {
|
&& !("-".equals(getRight()) || "+".equals(getRight()) || "A".equals(getRight()))) {
|
||||||
String x = ACIPRules.getUnicodeFor(getRight(), subscribed);
|
String x = ACIPRules.getUnicodeFor(getRight(), subscribed);
|
||||||
if (null != x) sb.append(x);
|
if (null == x) throw new Error("TPair: " + getRight() + " has no Uni");
|
||||||
|
sb.append(x);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -95,7 +95,7 @@ class TPairListFactory {
|
||||||
* 'ANG" circumstances
|
* 'ANG" circumstances
|
||||||
* @param weHaveSeenVowelAlready true if and only if, in our
|
* @param weHaveSeenVowelAlready true if and only if, in our
|
||||||
* recursion, we've already found one vowel (not a disambiguator,
|
* recursion, we've already found one vowel (not a disambiguator,
|
||||||
* but a vowel like "A", "E", "Um:", "'U", etc.) */
|
* but a vowel like "A", "E", "Um:", "m", "'U", etc.) */
|
||||||
private static TPairList breakHelper(String acip, boolean tickIsVowel, boolean weHaveSeenVowelAlready) {
|
private static TPairList breakHelper(String acip, boolean tickIsVowel, boolean weHaveSeenVowelAlready) {
|
||||||
|
|
||||||
// base case for our recursion:
|
// base case for our recursion:
|
||||||
|
@ -212,7 +212,11 @@ class TPairListFactory {
|
||||||
}
|
}
|
||||||
for (i = Math.min(ACIPRules.MAX_VOWEL_LENGTH, xl - ll); i >= 1; i--) {
|
for (i = Math.min(ACIPRules.MAX_VOWEL_LENGTH, xl - ll); i >= 1; i--) {
|
||||||
String t = null;
|
String t = null;
|
||||||
if (ACIPRules.isVowel(t = acip.substring(ll, ll + i))) {
|
if (ACIPRules.isVowel(t = acip.substring(ll, ll + i))
|
||||||
|
// Or these, which we massage into "Am", "Am:", and
|
||||||
|
// "A:" because I didn't think {Pm} should be treated
|
||||||
|
// like {PAm} originally:
|
||||||
|
|| "m".equals(t) || "m:".equals(t) || ":".equals(t)) {
|
||||||
r = t;
|
r = t;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -227,6 +231,14 @@ class TPairListFactory {
|
||||||
return new TPair(l, "+");
|
return new TPair(l, "+");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Allow Pm to mean PAm, P: to mean PA:, Pm: to mean PAm:.
|
||||||
|
int mod = 0;
|
||||||
|
if ("m".equals(r)) { r = "Am"; mod = -1; }
|
||||||
|
if (":".equals(r)) { r = "A:"; mod = -1; }
|
||||||
|
if ("m:".equals(r)) { r = "Am:"; mod = -1; }
|
||||||
|
if (":m".equals(r)) { r = "A:m"; mod = -1; } // not seen, though...
|
||||||
|
|
||||||
|
|
||||||
// what if we see a character that's not part of any vowel or
|
// what if we see a character that's not part of any vowel or
|
||||||
// consonant? We return it.
|
// consonant? We return it.
|
||||||
if (null == l && null == r) {
|
if (null == l && null == r) {
|
||||||
|
@ -236,7 +248,8 @@ class TPairListFactory {
|
||||||
}
|
}
|
||||||
|
|
||||||
howMuch[0] = (((l == null) ? 0 : l.length())
|
howMuch[0] = (((l == null) ? 0 : l.length())
|
||||||
+ ((r == null) ? 0 : r.length()));
|
+ ((r == null) ? 0 : r.length())
|
||||||
|
+ mod);
|
||||||
return new TPair(l, r);
|
return new TPair(l, r);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue