The ACIP->Tibetan converter now has perfect low-level functionality,

and it has the capability to produce error messages and warnings that
make sense to the user.  One can now get the correct parse, if one
exists, for an ACIP tsheg bar.

One could even feed in ACIP and get a list of warnings about things as
innocuous as PADMA, which a dumb converter would have trouble with.
One could then turn ACIP into well-behaved ACIP for that dumb
converter, if you really wanted to.

Still to do:

o Scan ACIP files into tsheg bars.
o Produce TMW/Latin (from which you can get Unicode, etc.).
o E-mail the illegal tsheg bars to the ACIP fellows so they can fix
  the affected documents (most of the Kangyur has unparseable
  creatures).
This commit is contained in:
dchandler 2003-08-12 04:13:11 +00:00
parent 87266646fb
commit 57f506384f
5 changed files with 258 additions and 38 deletions

View file

@ -50,25 +50,28 @@ public class PackageTest extends TestCase {
public PackageTest() { }
private static void tstHelper(String acip) {
tstHelper2(acip, null, false, null, null);
tstHelper2(acip, null, false, null, null, null);
}
private static void tstHelper(String acip, String expectedPairs) {
tstHelper2(acip, expectedPairs, false, null, null);
tstHelper2(acip, expectedPairs, false, null, null, null);
}
private static void tstHelper(String acip, String[] expectedParses) {
tstHelper2(acip, null, false, expectedParses, null);
tstHelper2(acip, null, false, expectedParses, null, null);
}
private static void tstHelper(String acip, String expectedPairs, String[] expectedParses) {
tstHelper2(acip, expectedPairs, false, expectedParses, null);
tstHelper2(acip, expectedPairs, false, expectedParses, null, null);
}
private static void tstHelper(String acip, String expectedPairs, String[] expectedParses, String[] legalParses) {
tstHelper2(acip, expectedPairs, false, expectedParses, legalParses);
tstHelper2(acip, expectedPairs, false, expectedParses, legalParses, null);
}
private static void tstHelper(String acip, String expectedPairs, String[] expectedParses, String[] legalParses, String expectedBestParse) {
tstHelper2(acip, expectedPairs, false, expectedParses, legalParses, expectedBestParse);
}
private static void tstHelper2(String acip) {
tstHelper2(acip, null);
}
private static void tstHelper2(String acip, String expectedPairs) {
tstHelper2(acip, expectedPairs, true, null, null);
tstHelper2(acip, expectedPairs, true, null, null, null);
}
private static final boolean sdebug = false;
@ -76,7 +79,8 @@ public class PackageTest extends TestCase {
String expectedPairs,
boolean debug,
String[] expectedParses,
String[] expectedLegalParses) {
String[] expectedLegalParses,
String expectedBestParse) {
TPairList l = TPairListFactory.breakACIPIntoChunks(acip);
if (sdebug || debug)
System.out.println("ACIP=" + acip + " and l'=" + l);
@ -95,6 +99,11 @@ public class PackageTest extends TestCase {
assertTrue(null == expectedParses || expectedParses.length == 0);
assertTrue(null == expectedLegalParses || expectedLegalParses.length == 0);
return;
} else {
if (pt.getWarning(false, l, acip) != null) {
System.out.println(pt.getWarning(false, l, acip));
} else if (pt.getWarning(true, l, acip) != null)
if (sdebug || debug) System.out.println("Paranoiac warning is this: " + pt.getWarning(true, l, acip));
}
int np = pt.numberOfParses();
boolean goodness = expectedParses == null || expectedParses.length == np;
@ -136,13 +145,14 @@ public class PackageTest extends TestCase {
assertTrue(goodness2);
TStackListList allLegalParses = pt.getLegalParses();
TStackListList decentParses = pt.getNonIllegalParses();
if (legalParses.size() != 1 && true && pt.getBestParse() == null) {
if (pt.getBestParse() == null) {
if (legalParses.size() == 0) {
if (pt.getBestParse() != null) {
System.out.print("There is a best parse for the slightly rocky ACIP {" + acip + "}; ");
} else {
System.out.print("ACIPNoBestParseError: There is no best parse for the ACIP {" + acip + "}; ");
if (null != expectedBestParse && !"".equals(expectedBestParse)) {
System.out.print("Expected is that there is a best parse \"" + expectedBestParse + "\" but there is no best parse for acip {" + acip + "}");
assertTrue(false);
}
System.out.print("ACIPNoBestParseError: There is no best parse for the ACIP {" + acip + "}; ");
if (decentParses.size() == 1) {
System.out.println("ACIPNoLegalParseError: NO LEGAL PARSE for the unambiguous ACIP {" + acip + "} (i.e., exactly one illegal parse exists, so it's unambiguous)");
// DLC FIXME: it's really unambiguous if one illegal parse has fewer glyphs than any other? {shthA'I} might be an example... but NO! because you go left-to-right to make stacks, except think of BRTAN vs. BRAN, they break that rule... ???? DLC ????
@ -150,12 +160,27 @@ public class PackageTest extends TestCase {
System.out.println("ACIPNoLegalParseError: NO PARSES for ACIP {" + acip + "}, decent parses are " + decentParses);
}
} else {
System.out.println("ACIPTooManyLegalParsesError: see these " + legalParses.size() + " legal parses for acip " + acip + ": " + legalParses);
assertTrue(legalParses.size() == 2
&& (legalParses.get(0).size()
== 1 + legalParses.get(1).size()));
if (legalParses.size() > 1) {
System.out.println("ACIPTooManyLegalParsesError: see these " + legalParses.size() + " legal parses for acip " + acip + ": " + legalParses);
assertTrue(legalParses.size() == 2
&& (legalParses.get(0).size()
== 1 + legalParses.get(1).size()));
}
}
} else {
if (legalParses.size() != 1) {
if (sdebug || debug) System.out.println("Best parse exists but there are none or two or more legal parses for ACIP {" + acip + "}");
}
if (null != expectedBestParse) {
boolean good = pt.getBestParse().equals(expectedBestParse);
if (!good) {
System.out.print("Expected best parse is \"" + expectedBestParse + "\" but the best parse is " + pt.getBestParse() + " for acip {" + acip + "}");
}
assertTrue(good);
}
if (sdebug || debug) System.out.println("There is a best parse for the slightly rocky ACIP {" + acip + "}");
}
if (allLegalParses.size() != legalParses.size()) {
if (sdebug || debug)
System.out.println("allLegalParses are " + allLegalParses + " and legalParses are " + legalParses);
@ -195,13 +220,28 @@ public class PackageTest extends TestCase {
public void testPerformance() {
tstHelper("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
tstHelper("901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678");
tstHelper("9012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678");
}
/** Tests {@link TPairListFactory#breakACIPIntoChunks(String)},
* {@link TPairList#getACIPError()}, and {@link
* TPairList#recoverACIP()}. */
public void testBreakACIPIntoChunks() {
tstHelper("AUTPA", "{AU}{T}{PA}",
new String[] { "{AU}{T}{PA}", "{AU}{T+PA}" },
new String[] { },
"{AU}{T+PA}");
tstHelper("PADMA", "{PA}{D}{MA}",
null,
null);
tstHelper("PADMA", "{PA}{D}{MA}",
new String[] { "{PA}{D}{MA}", "{PA}{D+MA}" },
new String[] { },
"{PA}{D+MA}");
tstHelper("PADMDM", "{PA}{D}{M}{D}{M}",
null,
new String[] { },
"{PA}{D+M}{D+M}");
tstHelper("GRVA'I", "{G}{R}{VA}{'I}",
new String[] { "{G}{R+VA}{'I}", "{G+R+VA}{'I}" },
new String[] { "{G+R+VA}{'I}" });
@ -1749,7 +1789,6 @@ tstHelper("CAM");
tstHelper("CAN");
tstHelper("CANG");
tstHelper("CAR");
tstHelper("CARVED");
tstHelper("CAS");
tstHelper("CE");
tstHelper("CE'AM");