{DAN,\nLHAG} used to be treated like {DAN, LHAG} but that got broken. Fixed.
Added tests for lexer's handling of ACIP spaces etc.
This commit is contained in:
parent
8e673bbc2c
commit
581643cf59
2 changed files with 19 additions and 4 deletions
|
@ -111,16 +111,19 @@ public class ACIPTshegBarScanner {
|
||||||
/** Helper. Here because ACIP {MTHAR%\nKHA} should be treated the
|
/** Helper. Here because ACIP {MTHAR%\nKHA} should be treated the
|
||||||
same w.r.t. tsheg insertion regardless of the lex errors and
|
same w.r.t. tsheg insertion regardless of the lex errors and
|
||||||
lex warnings found. */
|
lex warnings found. */
|
||||||
private static boolean lastNonExceptionalThingWasNonPunctish(ArrayList al) {
|
private static boolean lastNonExceptionalThingWasAdornmentOr(ArrayList al, int kind) {
|
||||||
int i = al.size() - 1;
|
int i = al.size() - 1;
|
||||||
while (i >= 0 && (((TString)al.get(i)).getType() == TString.WARNING
|
while (i >= 0 && (((TString)al.get(i)).getType() == TString.WARNING
|
||||||
|| ((TString)al.get(i)).getType() == TString.ERROR))
|
|| ((TString)al.get(i)).getType() == TString.ERROR))
|
||||||
--i;
|
--i;
|
||||||
return (i >= 0 && // FIXME: or maybe i < 0 || ...
|
return (i >= 0 && // FIXME: or maybe i < 0 || ...
|
||||||
(((TString)al.get(i)).getType() == TString.TIBETAN_NON_PUNCTUATION
|
(((TString)al.get(i)).getType() == kind
|
||||||
|| ((TString)al.get(i)).getType() == TString.TSHEG_BAR_ADORNMENT));
|
|| ((TString)al.get(i)).getType() == TString.TSHEG_BAR_ADORNMENT));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// DLC FIXME "H:\n\n" becomes "H: \n\n", wrongly I think. See
|
||||||
|
// Tibetan! 5.1 section on formatting Tibetan texts.
|
||||||
|
|
||||||
/** Returns a list of {@link TString TStrings} corresponding
|
/** Returns a list of {@link TString TStrings} corresponding
|
||||||
* to s, possibly the empty list (when the empty string is the
|
* to s, possibly the empty list (when the empty string is the
|
||||||
* input). Each String is either a Latin comment, some Latin
|
* input). Each String is either a Latin comment, some Latin
|
||||||
|
@ -808,7 +811,7 @@ public class ACIPTshegBarScanner {
|
||||||
if (('\r' == ch
|
if (('\r' == ch
|
||||||
|| ('\n' == ch && i > 0 && s.charAt(i - 1) != '\r'))
|
|| ('\n' == ch && i > 0 && s.charAt(i - 1) != '\r'))
|
||||||
&& !al.isEmpty()
|
&& !al.isEmpty()
|
||||||
&& lastNonExceptionalThingWasNonPunctish(al)) {
|
&& lastNonExceptionalThingWasAdornmentOr(al, TString.TIBETAN_NON_PUNCTUATION)) {
|
||||||
al.add(new TString(" ", TString.TIBETAN_PUNCTUATION));
|
al.add(new TString(" ", TString.TIBETAN_PUNCTUATION));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -816,7 +819,7 @@ public class ACIPTshegBarScanner {
|
||||||
if (('\r' == ch
|
if (('\r' == ch
|
||||||
|| ('\n' == ch && i > 0 && s.charAt(i - 1) != '\r'))
|
|| ('\n' == ch && i > 0 && s.charAt(i - 1) != '\r'))
|
||||||
&& !al.isEmpty()
|
&& !al.isEmpty()
|
||||||
&& lastNonExceptionalThingWasNonPunctish(al)
|
&& lastNonExceptionalThingWasAdornmentOr(al, TString.TIBETAN_PUNCTUATION)
|
||||||
&& ((TString)al.get(al.size() - 1)).getText().equals(",")
|
&& ((TString)al.get(al.size() - 1)).getText().equals(",")
|
||||||
&& s.charAt(i-1) == ','
|
&& s.charAt(i-1) == ','
|
||||||
&& (i + (('\r' == ch) ? 2 : 1) < sl
|
&& (i + (('\r' == ch) ? 2 : 1) < sl
|
||||||
|
|
|
@ -7526,6 +7526,18 @@ M+NA
|
||||||
"\u0f04\u0f05\u0f04\u0f05\u0f05\u0f67\u0f74\u0f7e\u0f7f\u0f0b\u0f42\u0fa2\u0f7d\u0f0b\u0f42\u0fb2\u0f7c\u0f08\u0f11\u0f0c\u0f0d");
|
"\u0f04\u0f05\u0f04\u0f05\u0f05\u0f67\u0f74\u0f7e\u0f7f\u0f0b\u0f42\u0fa2\u0f7d\u0f0b\u0f42\u0fb2\u0f7c\u0f08\u0f11\u0f0c\u0f0d");
|
||||||
uhelp("*#HUm: K+DHA GRO`;.,",
|
uhelp("*#HUm: K+DHA GRO`;.,",
|
||||||
"\u0f04\u0f05\u0f04\u0f05\u0f05\u0f67\u0f74\u0f7e\u0f7f\u0f0b\u0f40\u0fa2\u0f0b\u0f42\u0fb2\u0f7c\u0f08\u0f11\u0f0c\u0f0d");
|
"\u0f04\u0f05\u0f04\u0f05\u0f05\u0f67\u0f74\u0f7e\u0f7f\u0f0b\u0f40\u0fa2\u0f0b\u0f42\u0fb2\u0f7c\u0f08\u0f11\u0f0c\u0f0d");
|
||||||
|
uhelp("HA,\nHA\n\nHA", "\u0f67\u0f0d \u0f67\u0f0b\n\n\u0f67");
|
||||||
|
uhelp("NGA,", "\u0f44\u0f0c\u0f0d");
|
||||||
|
uhelp("NGA,\nHA\n\nHA", "\u0f44\u0f0c\u0f0d \u0f67\u0f0b\n\n\u0f67");
|
||||||
|
uhelp("MDO,\n\nKA\n\nHA", "\u0f58\u0f51\u0f7c\u0f0d\n\n\u0f40\u0f0b\n\n\u0f67");
|
||||||
|
uhelp("GA ,HA", "\u0f42 \u0f0d\u0f67");
|
||||||
|
uhelp("GA ,HA", "\u0f42 \u0f0d\u0f67");
|
||||||
|
uhelp("GU ,HA", "\u0f42\u0f74\u0f0b\u0f0d\u0f67");
|
||||||
|
uhelp("MA ,HA", "\u0f58\u0f0b\u0f0d\u0f67");
|
||||||
|
uhelp("GA HA", "\u0f42\u0f0b\u0f67");
|
||||||
|
uhelp("GU, ,KHO", "\u0f42\u0f74\u0f0d \u0f0d\u0f41\u0f7c");
|
||||||
|
uhelp("GU ,KHO", "\u0f42\u0f74\u0f0b \u0f0d\u0f41\u0f7c"); // FIXME: missing a shad after GU, warn about that.
|
||||||
|
uhelp("GA HA", "\u0f42\u0f0b \u0f67");
|
||||||
}
|
}
|
||||||
public void testFixedFormSubjoinedConsonants() {
|
public void testFixedFormSubjoinedConsonants() {
|
||||||
// Usual subjoined RA:
|
// Usual subjoined RA:
|
||||||
|
|
Loading…
Reference in a new issue