TMW->Wylie conversion now takes advantage of prefix rules, the rules
that say "ya can take a ga prefix" etc. The ACIP->Unicode converter now gives warnings (optionally, and by default, inline). This converter now produces output even when lexical errors occur, but the output has errors and warnings inline.
This commit is contained in:
parent
21ef657921
commit
d5ad760230
14 changed files with 678 additions and 270 deletions
|
@ -128,7 +128,7 @@ public class PackageTest extends TestCase {
|
|||
}
|
||||
|
||||
{
|
||||
TStackListList legalParses = pt.getUniqueParse();
|
||||
TStackListList legalParses = pt.getUniqueParse(false);
|
||||
boolean goodness2 = (expectedLegalParses == null
|
||||
|| expectedLegalParses.length == legalParses.size());
|
||||
for (int i = 0 ; i < legalParses.size(); i++) {
|
||||
|
@ -139,18 +139,21 @@ public class PackageTest extends TestCase {
|
|||
|| expectedLegalParses.length < i+1
|
||||
|| n.equals(expectedLegalParses[i]));
|
||||
if (!okay || !goodness2)
|
||||
System.out.println("Legal parse " + (i) + " (from zero) is " + n + " (toString2=" + n.toString2() + ") and expected is " + expectedLegalParses[i]);
|
||||
System.out.println("Legal parse " + (i) + " (from zero) is " + n + " (toString2=" + n.toString2() + ") and expected is "
|
||||
+ ((i < expectedLegalParses.length)
|
||||
? expectedLegalParses[i]
|
||||
: "not present"));
|
||||
assertTrue(okay);
|
||||
}
|
||||
if (!goodness2)
|
||||
System.out.println("You expected " + expectedLegalParses.length + " legal parses, but there were instead " + legalParses.size() + " legal parses.");
|
||||
System.out.println("You expected " + expectedLegalParses.length + " legal parses, but there were instead " + legalParses.size() + " legal parses for ACIP " + acip + ".");
|
||||
assertTrue(goodness2);
|
||||
TStackListList allLegalParses = pt.getLegalParses();
|
||||
TStackListList decentParses = pt.getNonIllegalParses();
|
||||
if (pt.getBestParse() == null) {
|
||||
if (legalParses.size() == 0) {
|
||||
if (null != expectedBestParse && !"".equals(expectedBestParse)) {
|
||||
System.out.print("Expected is that there is a best parse \"" + expectedBestParse + "\" but there is no best parse for acip {" + acip + "}");
|
||||
System.out.print("Expected is that there is a best parse \"" + expectedBestParse + "\" but there is no best parse for ACIP {" + acip + "}");
|
||||
assertTrue(false);
|
||||
}
|
||||
System.out.print("ACIPNoBestParseError: There is no best parse for the ACIP {" + acip + "}; ");
|
||||
|
@ -163,7 +166,7 @@ public class PackageTest extends TestCase {
|
|||
}
|
||||
} else {
|
||||
if (legalParses.size() > 1) {
|
||||
System.out.println("ACIPTooManyLegalParsesError: see these " + legalParses.size() + " legal parses for acip " + acip + ": " + legalParses);
|
||||
System.out.println("ACIPTooManyLegalParsesError: see these " + legalParses.size() + " legal parses for ACIP " + acip + ": " + legalParses);
|
||||
assertTrue(legalParses.size() == 2
|
||||
&& (legalParses.get(0).size()
|
||||
== 1 + legalParses.get(1).size()));
|
||||
|
@ -176,7 +179,7 @@ public class PackageTest extends TestCase {
|
|||
if (null != expectedBestParse) {
|
||||
boolean good = pt.getBestParse().equals(expectedBestParse);
|
||||
if (!good) {
|
||||
System.out.print("Expected best parse is \"" + expectedBestParse + "\" but the best parse is " + pt.getBestParse() + " for acip {" + acip + "}");
|
||||
System.out.print("Expected best parse is \"" + expectedBestParse + "\" but the best parse is " + pt.getBestParse() + " for ACIP {" + acip + "}");
|
||||
}
|
||||
assertTrue(good);
|
||||
}
|
||||
|
@ -229,6 +232,116 @@ public class PackageTest extends TestCase {
|
|||
* {@link TPairList#getACIPError()}, and {@link
|
||||
* TPairList#recoverACIP()}. */
|
||||
public void testBreakACIPIntoChunks() {
|
||||
tstHelper("GASN"); // ambiguous with regard to prefix rules
|
||||
tstHelper("BARMA"); // ambiguous with regard to prefix rules
|
||||
tstHelper("MARDA"); // ambiguous with regard to prefix rules
|
||||
tstHelper("BBA"); // ambiguous with regard to prefix rules
|
||||
tstHelper("BBLUGS"); // ambiguous with regard to prefix rules
|
||||
tstHelper("BDRA"); // ambiguous with regard to prefix rules
|
||||
tstHelper("BDRAG"); // ambiguous with regard to prefix rules
|
||||
tstHelper("BDRA'I"); // ambiguous with regard to prefix rules
|
||||
tstHelper("BDRAL"); // ambiguous with regard to prefix rules
|
||||
tstHelper("BDRAN"); // ambiguous with regard to prefix rules
|
||||
tstHelper("BDRANGS"); // ambiguous with regard to prefix rules
|
||||
tstHelper("BDREN"); // ambiguous with regard to prefix rules
|
||||
tstHelper("BDRI"); // ambiguous with regard to prefix rules
|
||||
tstHelper("BDRIS"); // ambiguous with regard to prefix rules
|
||||
tstHelper("BDROL"); // ambiguous with regard to prefix rules
|
||||
tstHelper("BDRUG"); // ambiguous with regard to prefix rules
|
||||
tstHelper("BLCAG"); // ambiguous with regard to prefix rules
|
||||
tstHelper("BLCI"); // ambiguous with regard to prefix rules
|
||||
tstHelper("BLKONG"); // ambiguous with regard to prefix rules
|
||||
tstHelper("BLNGA"); // ambiguous with regard to prefix rules
|
||||
tstHelper("BLNGAG"); // ambiguous with regard to prefix rules
|
||||
tstHelper("BMA"); // ambiguous with regard to prefix rules
|
||||
tstHelper("BMYOD"); // ambiguous with regard to prefix rules
|
||||
tstHelper("BSALDA"); // ambiguous with regard to prefix rules
|
||||
tstHelper("BSAMS"); // ambiguous with regard to prefix rules
|
||||
tstHelper("BSEMS"); // ambiguous with regard to prefix rules
|
||||
tstHelper("BTSAMS"); // ambiguous with regard to prefix rules
|
||||
tstHelper("BTSIMS"); // ambiguous with regard to prefix rules
|
||||
tstHelper("DDANG"); // ambiguous with regard to prefix rules
|
||||
tstHelper("DDAR"); // ambiguous with regard to prefix rules
|
||||
tstHelper("DDRANGS"); // ambiguous with regard to prefix rules
|
||||
tstHelper("DDRUG"); // ambiguous with regard to prefix rules
|
||||
tstHelper("DNAG"); // ambiguous with regard to prefix rules
|
||||
tstHelper("DNOGS"); // ambiguous with regard to prefix rules
|
||||
tstHelper("DRBAN"); // ambiguous with regard to prefix rules
|
||||
tstHelper("DRGYU"); // ambiguous with regard to prefix rules
|
||||
tstHelper("DRTOG"); // ambiguous with regard to prefix rules
|
||||
tstHelper("DYA"); // ambiguous with regard to prefix rules
|
||||
tstHelper("DYAN"); // ambiguous with regard to prefix rules
|
||||
tstHelper("GDRA"); // ambiguous with regard to prefix rules
|
||||
tstHelper("GDRIM"); // ambiguous with regard to prefix rules
|
||||
tstHelper("GGAN"); // ambiguous with regard to prefix rules
|
||||
tstHelper("GGYUR"); // ambiguous with regard to prefix rules
|
||||
tstHelper("GLTAR"); // ambiguous with regard to prefix rules
|
||||
tstHelper("GLTUNG"); // ambiguous with regard to prefix rules
|
||||
tstHelper("GMA"); // ambiguous with regard to prefix rules
|
||||
tstHelper("GMAN"); // ambiguous with regard to prefix rules
|
||||
tstHelper("GMON"); // ambiguous with regard to prefix rules
|
||||
tstHelper("GRDEGS"); // ambiguous with regard to prefix rules
|
||||
tstHelper("GRDZU"); // ambiguous with regard to prefix rules
|
||||
tstHelper("GRGYA"); // ambiguous with regard to prefix rules
|
||||
tstHelper("GRNAGS"); // ambiguous with regard to prefix rules
|
||||
tstHelper("GRTAN"); // ambiguous with regard to prefix rules
|
||||
tstHelper("GRTOGS"); // ambiguous with regard to prefix rules
|
||||
tstHelper("GRTZO"); // ambiguous with regard to prefix rules
|
||||
tstHelper("GRTZOD"); // ambiguous with regard to prefix rules
|
||||
tstHelper("GRTZON"); // ambiguous with regard to prefix rules
|
||||
tstHelper("GSLA"); // ambiguous with regard to prefix rules
|
||||
tstHelper("GSNAD"); // ambiguous with regard to prefix rules
|
||||
tstHelper("GZLA"); // ambiguous with regard to prefix rules
|
||||
tstHelper("MBA"); // ambiguous with regard to prefix rules
|
||||
tstHelper("MBA'"); // ambiguous with regard to prefix rules
|
||||
tstHelper("MBI'I"); // ambiguous with regard to prefix rules
|
||||
tstHelper("MHA'A"); // ambiguous with regard to prefix rules
|
||||
tstHelper("MRDA"); // ambiguous with regard to prefix rules
|
||||
tstHelper("MRDO"); // ambiguous with regard to prefix rules
|
||||
tstHelper("MRDZOGS"); // ambiguous with regard to prefix rules
|
||||
tstHelper("MRGA"); // ambiguous with regard to prefix rules
|
||||
tstHelper("MRGAD"); // ambiguous with regard to prefix rules
|
||||
tstHelper("MRGAN"); // ambiguous with regard to prefix rules
|
||||
tstHelper("MRJES"); // ambiguous with regard to prefix rules
|
||||
tstHelper("MRJOD"); // ambiguous with regard to prefix rules
|
||||
tstHelper("MRTOGS"); // ambiguous with regard to prefix rules
|
||||
tstHelper("MRTOL"); // ambiguous with regard to prefix rules
|
||||
tstHelper("MRTZE'I"); // ambiguous with regard to prefix rules
|
||||
tstHelper("MRTZIGS"); // ambiguous with regard to prefix rules
|
||||
tstHelper("MSAM"); // ambiguous with regard to prefix rules
|
||||
tstHelper("MSGRIB"); // ambiguous with regard to prefix rules
|
||||
tstHelper("MSKYES"); // ambiguous with regard to prefix rules
|
||||
tstHelper("MSON"); // ambiguous with regard to prefix rules
|
||||
tstHelper("MSOS"); // ambiguous with regard to prefix rules
|
||||
tstHelper("MSTAMS"); // ambiguous with regard to prefix rules
|
||||
tstHelper("MSTAN"); // ambiguous with regard to prefix rules
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// If you're not careful, you'll think GGYES is a legal
|
||||
// Tibetan tsheg bar and parse it as {G}{G+YE}{S}. But it's
|
||||
// Sanskrit, really, because GA doesn't take a GA prefix.
|
||||
// This doesn't occur in ACIP input files that I've seen, but
|
||||
// GGYI (S1000I.INC) and GGYUR (S5275MC4.ACT) do occur.
|
||||
tstHelper("GGYES", "{G}{G}{YE}{S}",
|
||||
new String[] { "{G}{G}{YE}{S}", "{G}{G+YE}{S}", "{G+G}{YE}{S}" },
|
||||
new String[] { },
|
||||
"{G+G}{YE}{S}");
|
||||
|
||||
tstHelper("DRUG", "{D}{RU}{G}",
|
||||
new String[] { "{D}{RU}{G}", "{D+RU}{G}" },
|
||||
new String[] { "{D+RU}{G}" },
|
||||
"{D+RU}{G}");
|
||||
|
||||
|
||||
tstHelper("d+H+d+HA", "{d+}{H+}{d+}{HA}",
|
||||
new String[] { "{d+H+d+HA}" },
|
||||
new String[] { "{d+H+d+HA}" });
|
||||
|
||||
tstHelper("Gd+H+d+HA");
|
||||
|
||||
tstHelper("AUTPA", "{AU}{T}{PA}",
|
||||
new String[] { "{AU}{T}{PA}", "{AU}{T+PA}" },
|
||||
new String[] { },
|
||||
|
@ -249,7 +362,8 @@ public class PackageTest extends TestCase {
|
|||
new String[] { "{G+R+VA}{'I}" });
|
||||
tstHelper("G-RVA'I", "{G-}{R}{VA}{'I}",
|
||||
new String[] { "{G}{R+VA}{'I}" },
|
||||
new String[] { "{G}{R+VA}{'I}" });
|
||||
new String[] { },
|
||||
"{G}{R+VA}{'I}");
|
||||
tstHelper("RVA", "{R}{VA}",
|
||||
new String[] { "{R+VA}" },
|
||||
new String[] { "{R+VA}" });
|
||||
|
@ -6967,8 +7081,8 @@ tstHelper("ZUR");
|
|||
"",
|
||||
"[TIBETAN_NON_PUNCTUATION:{LA}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, TIBETAN_NON_PUNCTUATION:{SGRUB}]"); // DLC FIXME
|
||||
shelp("PAS... LA",
|
||||
"Offset 5: A non-breaking tsheg, '.', appeared, but not like \"...,\" or \".,\" or \".dA\" or \".DA\".\n",
|
||||
"[TIBETAN_NON_PUNCTUATION:{PAS}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, ERROR:{.}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{LA}]");
|
||||
"Offset 5 or maybe 5: A non-breaking tsheg, '.', appeared, but not like \"...,\" or \".,\" or \".dA\" or \".DA\".\n",
|
||||
"[TIBETAN_NON_PUNCTUATION:{PAS}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, ERROR:{A non-breaking tsheg, '.', appeared, but not like \"...,\" or \".,\" or \".dA\" or \".DA\".}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{LA}]");
|
||||
shelp("PAS... LA",
|
||||
"",
|
||||
true,
|
||||
|
@ -6983,28 +7097,28 @@ tstHelper("ZUR");
|
|||
shelp("", "", "[]");
|
||||
shelp("[DD]", "");
|
||||
shelp("[",
|
||||
"Offset 0: Found an illegal open bracket (in context, this is [). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset END: Truly unmatched open bracket found.\n");
|
||||
"Offset 0 or maybe 0: Found an illegal open bracket (in context, this is [). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset END: Truly unmatched open bracket found.\n");
|
||||
shelp("{",
|
||||
"Offset 0: Found an illegal open bracket (in context, this is {). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset END: Truly unmatched open bracket found.\n");
|
||||
"Offset 0 or maybe 0: Found an illegal open bracket (in context, this is {). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset END: Truly unmatched open bracket found.\n");
|
||||
shelp("DD", "");
|
||||
shelp("DD]",
|
||||
"Offset 2: Found a truly unmatched close bracket, ] or }.\nOffset 2: Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
|
||||
"Offset 2 or maybe 2: Found a truly unmatched close bracket, ] or }.\nOffset 2 or maybe 2: Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
|
||||
|
||||
shelp("///NYA", "Offset 1: Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\nOffset END: Slashes are supposed to occur in pairs, but the input had an unmatched '/' character.\n");
|
||||
shelp("///NYA", "Offset 1 or maybe 1: Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\nOffset END: Slashes are supposed to occur in pairs, but the input had an unmatched '/' character.\n");
|
||||
shelp("/NYA/", "");
|
||||
shelp("[?][BP][LS][DD1][DD2][DDD][DR][# (<{A COMMENT)}>]", "");
|
||||
shelp("[LS][# A [[[[[COMMENT][LS]",
|
||||
"Offset 9: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n"
|
||||
+ "Offset 10: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n"
|
||||
+ "Offset 11: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n"
|
||||
+ "Offset 12: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n"
|
||||
+ "Offset 13: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n");
|
||||
"Offset 9 or maybe 9: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n"
|
||||
+ "Offset 10 or maybe 10: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n"
|
||||
+ "Offset 11 or maybe 11: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n"
|
||||
+ "Offset 12 or maybe 12: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n"
|
||||
+ "Offset 13 or maybe 13: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n");
|
||||
shelp("[ILLEGAL COMMENT]",
|
||||
"Offset 0: Found an illegal open bracket (in context, this is [ILLEGAL C...). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset 16: Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
|
||||
"Offset 0 or maybe 0: Found an illegal open bracket (in context, this is [ILLEGAL C...). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset 16 or maybe 16: Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
|
||||
shelp("(BSKYABS GRO)", ""); // DLC WHAT ARE THESE FOR?
|
||||
shelp("BSKYABS GRO)", "Offset 11: Unexpected closing parenthesis, ), found.\n");
|
||||
shelp("BSKYABS GRO)", "Offset 11 or maybe 11: Unexpected closing parenthesis, ), found.\n");
|
||||
shelp("BSKYABS GRO(", "Offset END: Unmatched open parenthesis, (, found.\n");
|
||||
shelp("((NESTAGE))", "Offset 1: Found an illegal open parenthesis, (. Nesting of parentheses is not allowed.\nOffset 10: Unexpected closing parenthesis, ), found.\n");
|
||||
shelp("((NESTAGE))", "Offset 1 or maybe 1: Found an illegal open parenthesis, (. Nesting of parentheses is not allowed.\nOffset 10 or maybe 10: Unexpected closing parenthesis, ), found.\n");
|
||||
shelp("(BA)(PA)NYA(CA)", "");
|
||||
shelp("NYAx", "");
|
||||
shelp("NYA x", "");
|
||||
|
@ -7033,9 +7147,9 @@ tstHelper("ZUR");
|
|||
shelp("(NYA ", "Offset END: Unmatched open parenthesis, (, found.\n");
|
||||
shelp("[*NYA ", "Offset END: Unmatched open bracket found. A correction does not terminate.\n");
|
||||
shelp("?", "", "[QUESTION:{?}]");
|
||||
shelp("KHAN~ BAR ", "Offset 4: Found an illegal character, ~, with ordinal 126.\n");
|
||||
shelp("KHAN~ BAR ", "Offset 4 or maybe 4: Found an illegal character, ~, with ordinal 126.\n");
|
||||
shelp("[* Correction with []]",
|
||||
"Offset 5: Found an illegal character, r, with ordinal 114.\nOffset 6: Found an illegal character, r, with ordinal 114.\nOffset 7: Found an illegal character, e, with ordinal 101.\nOffset 8: Found an illegal character, c, with ordinal 99.\nOffset 14: Found an illegal character, w, with ordinal 119.\nOffset 19: Found an illegal open bracket (in context, this is []]). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset 21: Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
|
||||
"Offset 5 or maybe 5: Found an illegal character, r, with ordinal 114.\nOffset 6 or maybe 6: Found an illegal character, r, with ordinal 114.\nOffset 7 or maybe 7: Found an illegal character, e, with ordinal 101.\nOffset 8 or maybe 8: Found an illegal character, c, with ordinal 99.\nOffset 14 or maybe 14: Found an illegal character, w, with ordinal 119.\nOffset 19 or maybe 19: Found an illegal open bracket (in context, this is []]). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset 21 or maybe 21: Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
|
||||
|
||||
// DLC FIXME: the line SDIG PA'I GROGS PO'I LAG TU SON PAR 'GYUR PA is followed by a blank line. Note that it's "PA", not "PA ", ending it. Autocorrect to the latter.
|
||||
|
||||
|
@ -7051,8 +7165,8 @@ tstHelper("ZUR");
|
|||
uhelp(" 1\\ ", "\u0f0b\u0f21\u0f84\u0f0b");
|
||||
}
|
||||
shelp("K\\,",
|
||||
"Offset 1: Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.\n",
|
||||
"[TIBETAN_NON_PUNCTUATION:{K}, ERROR:{\\}, TIBETAN_PUNCTUATION:{,}]");
|
||||
"Offset 1 or maybe 1: Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.\n",
|
||||
"[TIBETAN_NON_PUNCTUATION:{K}, ERROR:{Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.}, TIBETAN_PUNCTUATION:{,}]");
|
||||
|
||||
|
||||
shelp("MTHAR%", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR%}]");
|
||||
|
@ -7073,15 +7187,15 @@ tstHelper("ZUR");
|
|||
shelp("@01A.3 ", "", "[FOLIO_MARKER:{@01A.3}, TIBETAN_PUNCTUATION:{ }]");
|
||||
shelp("@001 ", "", "[FOLIO_MARKER:{@001}, TIBETAN_PUNCTUATION:{ }]");
|
||||
shelp("@19-20A",
|
||||
"Offset 0: Found an illegal at sign, @ (in context, this is @19-20A). @012B is an example of a legal folio marker.\n",
|
||||
"[ERROR:{@}, TIBETAN_NON_PUNCTUATION:{19-20A}]"); // DLC FIXME: yes it occurs in the kangyur.
|
||||
"Offset 0 or maybe 0: Found an illegal at sign, @ (in context, this is @19-20A). @012B is an example of a legal folio marker.\n",
|
||||
"[ERROR:{Found an illegal at sign, @ (in context, this is @19-20A). @012B is an example of a legal folio marker.}, TIBETAN_NON_PUNCTUATION:{19-20A}]"); // DLC FIXME: yes it occurs in the kangyur.
|
||||
shelp("@[7B]", "");
|
||||
shelp("@012A.3KA",
|
||||
"",
|
||||
"[FOLIO_MARKER:{@012A.3}, TIBETAN_NON_PUNCTUATION:{KA}]");
|
||||
shelp("@012A.34",
|
||||
"Offset 0: Found an illegal at sign, @ (in context, this is @012A.34). This folio marker has a period, '.', at the end of it, which is illegal.\n",
|
||||
"[ERROR:{@012A.}, TIBETAN_NON_PUNCTUATION:{34}]");
|
||||
"Offset 0 or maybe 0: Found an illegal at sign, @ (in context, this is @012A.34). This folio marker has a period, '.', at the end of it, which is illegal.\n",
|
||||
"[ERROR:{Found an illegal at sign, @ (in context, this is @012A.34). This folio marker has a period, '.', at the end of it, which is illegal.}, TIBETAN_NON_PUNCTUATION:{34}]");
|
||||
shelp("@[07B]", "");
|
||||
shelp("@[00007B]", "");
|
||||
shelp("@7B", "");
|
||||
|
@ -7097,8 +7211,8 @@ tstHelper("ZUR");
|
|||
shelp("{ DD }", "", "[DD:{{ DD }}]"); // TD3790E2.ACT
|
||||
shelp("{ BP }", "", "[BP:{{ BP }}]"); // TD3790E2.ACT
|
||||
shelp("//NYA\\\\",
|
||||
"Offset 1: Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\nOffset 5: Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.\nOffset 6: Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.\n",
|
||||
"[START_SLASH:{/}, ERROR:{//}, END_SLASH:{/}, TIBETAN_NON_PUNCTUATION:{NYA}, ERROR:{\\}, ERROR:{\\}]");
|
||||
"Offset 1 or maybe 1: Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\nOffset 5 or maybe 5: Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.\nOffset 6 or maybe 6: Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.\n",
|
||||
"[START_SLASH:{/}, ERROR:{Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.}, END_SLASH:{/}, TIBETAN_NON_PUNCTUATION:{NYA}, ERROR:{Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.}, ERROR:{Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly. Sorry! Please do complain to the maintainers.}]");
|
||||
|
||||
}
|
||||
private static void uhelp(String acip) {
|
||||
|
@ -7106,7 +7220,7 @@ tstHelper("ZUR");
|
|||
}
|
||||
private static void uhelp(String acip, String expectedUnicode) {
|
||||
StringBuffer errors = new StringBuffer();
|
||||
String unicode = ACIPConverter.convertToUnicode(acip, errors);
|
||||
String unicode = ACIPConverter.convertToUnicode(acip, errors, null, true);
|
||||
if (null == unicode) {
|
||||
if (null != expectedUnicode && "none" != expectedUnicode) {
|
||||
System.out.println("No unicode exists for " + acip + " but you expected " + org.thdl.tib.text.tshegbar.UnicodeUtils.unicodeStringToPrettyString(expectedUnicode));
|
||||
|
@ -8729,22 +8843,22 @@ tstHelper("shKA");
|
|||
}
|
||||
/* DLC FIXME: add test cases: from R0021F.ACE: ambiguous Tibetan/Sanskrit:
|
||||
|
||||
BDA' þþþþ
|
||||
B+DA þþþ
|
||||
DBANG þþþ
|
||||
D+BA þþþ
|
||||
DGA' þþþþ
|
||||
D+GA þþþ
|
||||
DGRA þþþ
|
||||
D+GRA þþþ
|
||||
DGYESþþþþþ
|
||||
D+GYA þþþ
|
||||
DMAR þþþþ
|
||||
D+MA þþþ
|
||||
GDA' þþþþ
|
||||
G+DA þþþ
|
||||
GNAD þþþþ
|
||||
G+NA þþþ
|
||||
MNA' þþþþ
|
||||
M+NA þþþ
|
||||
BDA'
|
||||
B+DA
|
||||
DBANG
|
||||
D+BA
|
||||
DGA'
|
||||
D+GA
|
||||
DGRA
|
||||
D+GRA
|
||||
DGYES
|
||||
D+GYA
|
||||
DMAR
|
||||
D+MA
|
||||
GDA'
|
||||
G+DA
|
||||
GNAD
|
||||
G+NA
|
||||
MNA'
|
||||
M+NA
|
||||
*/
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue