The ACIP->Tibetan converter now has perfect low-level functionality,
and it has the capability to produce error messages and warnings that make sense to the user. One can now get the correct parse, if one exists, for an ACIP tsheg bar. One could even feed in ACIP and get a list of warnings about things as innocuous as PADMA, which a dumb converter would have trouble with. One could then turn ACIP into well-behaved ACIP for that dumb converter, if you really wanted to. Still to do: o Scan ACIP files into tsheg bars. o Produce TMW/Latin (from which you can get Unicode, etc.). o E-mail the illegal tsheg bars to the ACIP fellows so they can fix the affected documents (most of the Kangyur has unparseable creatures).
This commit is contained in:
parent
87266646fb
commit
57f506384f
5 changed files with 258 additions and 38 deletions
|
@ -50,25 +50,28 @@ public class PackageTest extends TestCase {
|
||||||
|
|
||||||
public PackageTest() { }
|
public PackageTest() { }
|
||||||
private static void tstHelper(String acip) {
|
private static void tstHelper(String acip) {
|
||||||
tstHelper2(acip, null, false, null, null);
|
tstHelper2(acip, null, false, null, null, null);
|
||||||
}
|
}
|
||||||
private static void tstHelper(String acip, String expectedPairs) {
|
private static void tstHelper(String acip, String expectedPairs) {
|
||||||
tstHelper2(acip, expectedPairs, false, null, null);
|
tstHelper2(acip, expectedPairs, false, null, null, null);
|
||||||
}
|
}
|
||||||
private static void tstHelper(String acip, String[] expectedParses) {
|
private static void tstHelper(String acip, String[] expectedParses) {
|
||||||
tstHelper2(acip, null, false, expectedParses, null);
|
tstHelper2(acip, null, false, expectedParses, null, null);
|
||||||
}
|
}
|
||||||
private static void tstHelper(String acip, String expectedPairs, String[] expectedParses) {
|
private static void tstHelper(String acip, String expectedPairs, String[] expectedParses) {
|
||||||
tstHelper2(acip, expectedPairs, false, expectedParses, null);
|
tstHelper2(acip, expectedPairs, false, expectedParses, null, null);
|
||||||
}
|
}
|
||||||
private static void tstHelper(String acip, String expectedPairs, String[] expectedParses, String[] legalParses) {
|
private static void tstHelper(String acip, String expectedPairs, String[] expectedParses, String[] legalParses) {
|
||||||
tstHelper2(acip, expectedPairs, false, expectedParses, legalParses);
|
tstHelper2(acip, expectedPairs, false, expectedParses, legalParses, null);
|
||||||
|
}
|
||||||
|
private static void tstHelper(String acip, String expectedPairs, String[] expectedParses, String[] legalParses, String expectedBestParse) {
|
||||||
|
tstHelper2(acip, expectedPairs, false, expectedParses, legalParses, expectedBestParse);
|
||||||
}
|
}
|
||||||
private static void tstHelper2(String acip) {
|
private static void tstHelper2(String acip) {
|
||||||
tstHelper2(acip, null);
|
tstHelper2(acip, null);
|
||||||
}
|
}
|
||||||
private static void tstHelper2(String acip, String expectedPairs) {
|
private static void tstHelper2(String acip, String expectedPairs) {
|
||||||
tstHelper2(acip, expectedPairs, true, null, null);
|
tstHelper2(acip, expectedPairs, true, null, null, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final boolean sdebug = false;
|
private static final boolean sdebug = false;
|
||||||
|
@ -76,7 +79,8 @@ public class PackageTest extends TestCase {
|
||||||
String expectedPairs,
|
String expectedPairs,
|
||||||
boolean debug,
|
boolean debug,
|
||||||
String[] expectedParses,
|
String[] expectedParses,
|
||||||
String[] expectedLegalParses) {
|
String[] expectedLegalParses,
|
||||||
|
String expectedBestParse) {
|
||||||
TPairList l = TPairListFactory.breakACIPIntoChunks(acip);
|
TPairList l = TPairListFactory.breakACIPIntoChunks(acip);
|
||||||
if (sdebug || debug)
|
if (sdebug || debug)
|
||||||
System.out.println("ACIP=" + acip + " and l'=" + l);
|
System.out.println("ACIP=" + acip + " and l'=" + l);
|
||||||
|
@ -95,6 +99,11 @@ public class PackageTest extends TestCase {
|
||||||
assertTrue(null == expectedParses || expectedParses.length == 0);
|
assertTrue(null == expectedParses || expectedParses.length == 0);
|
||||||
assertTrue(null == expectedLegalParses || expectedLegalParses.length == 0);
|
assertTrue(null == expectedLegalParses || expectedLegalParses.length == 0);
|
||||||
return;
|
return;
|
||||||
|
} else {
|
||||||
|
if (pt.getWarning(false, l, acip) != null) {
|
||||||
|
System.out.println(pt.getWarning(false, l, acip));
|
||||||
|
} else if (pt.getWarning(true, l, acip) != null)
|
||||||
|
if (sdebug || debug) System.out.println("Paranoiac warning is this: " + pt.getWarning(true, l, acip));
|
||||||
}
|
}
|
||||||
int np = pt.numberOfParses();
|
int np = pt.numberOfParses();
|
||||||
boolean goodness = expectedParses == null || expectedParses.length == np;
|
boolean goodness = expectedParses == null || expectedParses.length == np;
|
||||||
|
@ -136,13 +145,14 @@ public class PackageTest extends TestCase {
|
||||||
assertTrue(goodness2);
|
assertTrue(goodness2);
|
||||||
TStackListList allLegalParses = pt.getLegalParses();
|
TStackListList allLegalParses = pt.getLegalParses();
|
||||||
TStackListList decentParses = pt.getNonIllegalParses();
|
TStackListList decentParses = pt.getNonIllegalParses();
|
||||||
if (legalParses.size() != 1 && true && pt.getBestParse() == null) {
|
if (pt.getBestParse() == null) {
|
||||||
if (legalParses.size() == 0) {
|
if (legalParses.size() == 0) {
|
||||||
if (pt.getBestParse() != null) {
|
if (null != expectedBestParse && !"".equals(expectedBestParse)) {
|
||||||
System.out.print("There is a best parse for the slightly rocky ACIP {" + acip + "}; ");
|
System.out.print("Expected is that there is a best parse \"" + expectedBestParse + "\" but there is no best parse for acip {" + acip + "}");
|
||||||
} else {
|
assertTrue(false);
|
||||||
System.out.print("ACIPNoBestParseError: There is no best parse for the ACIP {" + acip + "}; ");
|
|
||||||
}
|
}
|
||||||
|
System.out.print("ACIPNoBestParseError: There is no best parse for the ACIP {" + acip + "}; ");
|
||||||
|
|
||||||
if (decentParses.size() == 1) {
|
if (decentParses.size() == 1) {
|
||||||
System.out.println("ACIPNoLegalParseError: NO LEGAL PARSE for the unambiguous ACIP {" + acip + "} (i.e., exactly one illegal parse exists, so it's unambiguous)");
|
System.out.println("ACIPNoLegalParseError: NO LEGAL PARSE for the unambiguous ACIP {" + acip + "} (i.e., exactly one illegal parse exists, so it's unambiguous)");
|
||||||
// DLC FIXME: it's really unambiguous if one illegal parse has fewer glyphs than any other? {shthA'I} might be an example... but NO! because you go left-to-right to make stacks, except think of BRTAN vs. BRAN, they break that rule... ???? DLC ????
|
// DLC FIXME: it's really unambiguous if one illegal parse has fewer glyphs than any other? {shthA'I} might be an example... but NO! because you go left-to-right to make stacks, except think of BRTAN vs. BRAN, they break that rule... ???? DLC ????
|
||||||
|
@ -150,12 +160,27 @@ public class PackageTest extends TestCase {
|
||||||
System.out.println("ACIPNoLegalParseError: NO PARSES for ACIP {" + acip + "}, decent parses are " + decentParses);
|
System.out.println("ACIPNoLegalParseError: NO PARSES for ACIP {" + acip + "}, decent parses are " + decentParses);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
System.out.println("ACIPTooManyLegalParsesError: see these " + legalParses.size() + " legal parses for acip " + acip + ": " + legalParses);
|
if (legalParses.size() > 1) {
|
||||||
assertTrue(legalParses.size() == 2
|
System.out.println("ACIPTooManyLegalParsesError: see these " + legalParses.size() + " legal parses for acip " + acip + ": " + legalParses);
|
||||||
&& (legalParses.get(0).size()
|
assertTrue(legalParses.size() == 2
|
||||||
== 1 + legalParses.get(1).size()));
|
&& (legalParses.get(0).size()
|
||||||
|
== 1 + legalParses.get(1).size()));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
if (legalParses.size() != 1) {
|
||||||
|
if (sdebug || debug) System.out.println("Best parse exists but there are none or two or more legal parses for ACIP {" + acip + "}");
|
||||||
|
}
|
||||||
|
if (null != expectedBestParse) {
|
||||||
|
boolean good = pt.getBestParse().equals(expectedBestParse);
|
||||||
|
if (!good) {
|
||||||
|
System.out.print("Expected best parse is \"" + expectedBestParse + "\" but the best parse is " + pt.getBestParse() + " for acip {" + acip + "}");
|
||||||
|
}
|
||||||
|
assertTrue(good);
|
||||||
|
}
|
||||||
|
if (sdebug || debug) System.out.println("There is a best parse for the slightly rocky ACIP {" + acip + "}");
|
||||||
}
|
}
|
||||||
|
|
||||||
if (allLegalParses.size() != legalParses.size()) {
|
if (allLegalParses.size() != legalParses.size()) {
|
||||||
if (sdebug || debug)
|
if (sdebug || debug)
|
||||||
System.out.println("allLegalParses are " + allLegalParses + " and legalParses are " + legalParses);
|
System.out.println("allLegalParses are " + allLegalParses + " and legalParses are " + legalParses);
|
||||||
|
@ -195,13 +220,28 @@ public class PackageTest extends TestCase {
|
||||||
|
|
||||||
public void testPerformance() {
|
public void testPerformance() {
|
||||||
tstHelper("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
|
tstHelper("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
|
||||||
tstHelper("901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678");
|
tstHelper("9012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678");
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Tests {@link TPairListFactory#breakACIPIntoChunks(String)},
|
/** Tests {@link TPairListFactory#breakACIPIntoChunks(String)},
|
||||||
* {@link TPairList#getACIPError()}, and {@link
|
* {@link TPairList#getACIPError()}, and {@link
|
||||||
* TPairList#recoverACIP()}. */
|
* TPairList#recoverACIP()}. */
|
||||||
public void testBreakACIPIntoChunks() {
|
public void testBreakACIPIntoChunks() {
|
||||||
|
tstHelper("AUTPA", "{AU}{T}{PA}",
|
||||||
|
new String[] { "{AU}{T}{PA}", "{AU}{T+PA}" },
|
||||||
|
new String[] { },
|
||||||
|
"{AU}{T+PA}");
|
||||||
|
tstHelper("PADMA", "{PA}{D}{MA}",
|
||||||
|
null,
|
||||||
|
null);
|
||||||
|
tstHelper("PADMA", "{PA}{D}{MA}",
|
||||||
|
new String[] { "{PA}{D}{MA}", "{PA}{D+MA}" },
|
||||||
|
new String[] { },
|
||||||
|
"{PA}{D+MA}");
|
||||||
|
tstHelper("PADMDM", "{PA}{D}{M}{D}{M}",
|
||||||
|
null,
|
||||||
|
new String[] { },
|
||||||
|
"{PA}{D+M}{D+M}");
|
||||||
tstHelper("GRVA'I", "{G}{R}{VA}{'I}",
|
tstHelper("GRVA'I", "{G}{R}{VA}{'I}",
|
||||||
new String[] { "{G}{R+VA}{'I}", "{G+R+VA}{'I}" },
|
new String[] { "{G}{R+VA}{'I}", "{G+R+VA}{'I}" },
|
||||||
new String[] { "{G+R+VA}{'I}" });
|
new String[] { "{G+R+VA}{'I}" });
|
||||||
|
@ -1749,7 +1789,6 @@ tstHelper("CAM");
|
||||||
tstHelper("CAN");
|
tstHelper("CAN");
|
||||||
tstHelper("CANG");
|
tstHelper("CANG");
|
||||||
tstHelper("CAR");
|
tstHelper("CAR");
|
||||||
tstHelper("CARVED");
|
|
||||||
tstHelper("CAS");
|
tstHelper("CAS");
|
||||||
tstHelper("CE");
|
tstHelper("CE");
|
||||||
tstHelper("CE'AM");
|
tstHelper("CE'AM");
|
||||||
|
|
|
@ -54,6 +54,31 @@ class TPairList {
|
||||||
/** Returns the ith pair in this list. */
|
/** Returns the ith pair in this list. */
|
||||||
public TPair get(int i) { return (TPair)al.get(i); }
|
public TPair get(int i) { return (TPair)al.get(i); }
|
||||||
|
|
||||||
|
/** Returns the ith non-disambiguator pair in this list. This is
|
||||||
|
* O(size()). */
|
||||||
|
public TPair getNthNonDisambiguatorPair(int n) {
|
||||||
|
TPair p;
|
||||||
|
int count = 0;
|
||||||
|
for (int i = 0; i < size(); i++) {
|
||||||
|
p = get(i);
|
||||||
|
if (!p.isDisambiguator())
|
||||||
|
if (count++ == n)
|
||||||
|
return p;
|
||||||
|
}
|
||||||
|
throw new IllegalArgumentException("n, " + n + " is too big for this list of pairs, " + toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns the number of pairs in this list that are not entirely
|
||||||
|
* disambiguators. */
|
||||||
|
public int sizeMinusDisambiguators() {
|
||||||
|
int count = 0;
|
||||||
|
for (int i = 0; i < size(); i++) {
|
||||||
|
if (!get(i).isDisambiguator())
|
||||||
|
++count;
|
||||||
|
}
|
||||||
|
return count;
|
||||||
|
}
|
||||||
|
|
||||||
/** Adds p to the end of this list. */
|
/** Adds p to the end of this list. */
|
||||||
public void add(TPair p) {
|
public void add(TPair p) {
|
||||||
if (p == null || (p.getLeft() == null && p.getRight() == null))
|
if (p == null || (p.getLeft() == null && p.getRight() == null))
|
||||||
|
@ -253,11 +278,12 @@ class TPairList {
|
||||||
|
|
||||||
// DLC TEST: BA'I has exactly two syntactically legal parses but just one TStackList.
|
// DLC TEST: BA'I has exactly two syntactically legal parses but just one TStackList.
|
||||||
|
|
||||||
/** Returns a set (as as ArrayList) of all possible
|
/** Returns a set (as as ArrayList) of all possible TStackLists.
|
||||||
* TStackLists. Uses knowledge of Tibetan spelling rules
|
* Uses knowledge of Tibetan spelling rules (i.e., tsheg bar
|
||||||
* (i.e., tsheg bar syntax) to do so. If this list of pairs has
|
* syntax) to do so. If this list of pairs has something clearly
|
||||||
* something clearly illegal in it, or is empty, or is merely a
|
* illegal in it, or is empty, or is merely a list of
|
||||||
* list of disambiguators etc., then this returns null. */
|
* disambiguators etc., then this returns null. Never returns an
|
||||||
|
* empty parse tree. */
|
||||||
public TParseTree getParseTree() {
|
public TParseTree getParseTree() {
|
||||||
TParseTree pt = new TParseTree();
|
TParseTree pt = new TParseTree();
|
||||||
int sz = size();
|
int sz = size();
|
||||||
|
@ -308,7 +334,7 @@ class TPairList {
|
||||||
// give a nice error message in this case.
|
// give a nice error message in this case.
|
||||||
if (ddebug) System.out.println("ddebug: we're going to do 2^" + (j-i+1) + " [or " + (1 << (j-i+1)) + "] wacky iterations!");
|
if (ddebug) System.out.println("ddebug: we're going to do 2^" + (j-i+1) + " [or " + (1 << (j-i+1)) + "] wacky iterations!");
|
||||||
if ((j-i+1) > 13) // if you don't use 13, then change PackageTest.testSlowestTshegBar().
|
if ((j-i+1) > 13) // if you don't use 13, then change PackageTest.testSlowestTshegBar().
|
||||||
return new TParseTree();
|
return null;
|
||||||
|
|
||||||
boolean keepGoing = true;
|
boolean keepGoing = true;
|
||||||
TStackListList sll = new TStackListList();
|
TStackListList sll = new TStackListList();
|
||||||
|
@ -423,6 +449,7 @@ class TPairList {
|
||||||
// your tsheg bar.
|
// your tsheg bar.
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (pt.isEmpty()) return null;
|
||||||
return pt;
|
return pt;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -533,7 +560,8 @@ class TPairList {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (null == thislWylie) throw new Error("BADNESS AT MAXIMUM: p is " + p + " and thislWylie is " + thislWylie);
|
if (null == thislWylie)
|
||||||
|
throw new Error("BADNESS AT MAXIMUM: p is " + p + " and thislWylie is " + thislWylie);
|
||||||
lWylie.append(thislWylie);
|
lWylie.append(thislWylie);
|
||||||
StringBuffer ll = new StringBuffer(lWylie.toString());
|
StringBuffer ll = new StringBuffer(lWylie.toString());
|
||||||
int ww;
|
int ww;
|
||||||
|
@ -542,11 +570,11 @@ class TPairList {
|
||||||
ll.deleteCharAt(ww);
|
ll.deleteCharAt(ww);
|
||||||
boolean isTibetan = TibetanMachineWeb.isWylieTibetanConsonantOrConsonantStack(ll.toString());
|
boolean isTibetan = TibetanMachineWeb.isWylieTibetanConsonantOrConsonantStack(ll.toString());
|
||||||
boolean isSanskrit = TibetanMachineWeb.isWylieSanskritConsonantStack(lWylie.toString());
|
boolean isSanskrit = TibetanMachineWeb.isWylieSanskritConsonantStack(lWylie.toString());
|
||||||
if (!isTibetan && !isSanskrit && !isNumeric && true) {
|
if (ddebug && !isTibetan && !isSanskrit && !isNumeric) {
|
||||||
System.out.println("DLC: OTHER for " + lWylie + " with vowel " + ACIPRules.getWylieForACIPVowel(p.getRight()) + " and p.getRight()=" + p.getRight());
|
System.out.println("DLC: OTHER for " + lWylie + " with vowel " + ACIPRules.getWylieForACIPVowel(p.getRight()) + " and p.getRight()=" + p.getRight());
|
||||||
}
|
}
|
||||||
if (isTibetan && isSanskrit) isSanskrit = false; // RVA, e.g.
|
if (isTibetan && isSanskrit) isSanskrit = false; // RVA, e.g.
|
||||||
if (true && hasNonAVowel && ACIPRules.getWylieForACIPVowel(p.getRight()) == null) {
|
if (ddebug && hasNonAVowel && ACIPRules.getWylieForACIPVowel(p.getRight()) == null) {
|
||||||
System.out.println("DLC: vowel " + ACIPRules.getWylieForACIPVowel(p.getRight()) + " and p.getRight()=" + p.getRight());
|
System.out.println("DLC: vowel " + ACIPRules.getWylieForACIPVowel(p.getRight()) + " and p.getRight()=" + p.getRight());
|
||||||
}
|
}
|
||||||
TGCPair tp;
|
TGCPair tp;
|
||||||
|
|
|
@ -32,7 +32,7 @@ class TParseTree {
|
||||||
/** Creates an empty list. */
|
/** Creates an empty list. */
|
||||||
public TParseTree() { }
|
public TParseTree() { }
|
||||||
|
|
||||||
/** Returns the ith pair in this list. */
|
/** Returns the ith list of stack lists in this parse tree. */
|
||||||
public TStackListList get(int i) { return (TStackListList)al.get(i); }
|
public TStackListList get(int i) { return (TStackListList)al.get(i); }
|
||||||
|
|
||||||
/** Adds p to the end of this list. */
|
/** Adds p to the end of this list. */
|
||||||
|
@ -98,8 +98,8 @@ class TParseTree {
|
||||||
return sll;
|
return sll;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns a list containing the parses of this parse tree that
|
/** Returns a list (never null) containing the parses of this
|
||||||
* are not clearly illegal. */
|
* parse tree that are not clearly illegal. */
|
||||||
public TStackListList getNonIllegalParses() {
|
public TStackListList getNonIllegalParses() {
|
||||||
TStackListList sll = new TStackListList(2); // save memory
|
TStackListList sll = new TStackListList(2); // save memory
|
||||||
ParseIterator pi = getParseIterator();
|
ParseIterator pi = getParseIterator();
|
||||||
|
@ -124,12 +124,61 @@ class TParseTree {
|
||||||
TStackListList up = getUniqueParse();
|
TStackListList up = getUniqueParse();
|
||||||
if (up.size() == 1)
|
if (up.size() == 1)
|
||||||
return up.get(0);
|
return up.get(0);
|
||||||
else if (up.size() == 2) {
|
|
||||||
}
|
|
||||||
up = getNonIllegalParses();
|
up = getNonIllegalParses();
|
||||||
int sz = up.size();
|
int sz = up.size();
|
||||||
if (up.size() == 1) {
|
if (sz == 1) {
|
||||||
return up.get(0);
|
return up.get(0);
|
||||||
|
} else if (sz > 1) {
|
||||||
|
// {PADMA}, for example. Our technique is to go from the
|
||||||
|
// left and stack as much as we can. So {PA}{D}{MA} is
|
||||||
|
// inferior to {PA}{D+MA}, and {PA}{D+MA}{D}{MA} is
|
||||||
|
// inferior to {PA}{D+MA}{D+MA}. We do not look for the
|
||||||
|
// minimum number of glyphs, though -- {PA}{N+D}{B+H+R}
|
||||||
|
// and {PA}{N}{D+B+H+R} tie by that score, but the former
|
||||||
|
// is the clear winner.
|
||||||
|
|
||||||
|
// We give a warning about these, optionally, so that
|
||||||
|
// users can produce output that even a dumb ACIP reader
|
||||||
|
// can understand. See getWarning(true, ..).
|
||||||
|
|
||||||
|
// if j is in this list, then up.get(j) is still a
|
||||||
|
// potential winner.
|
||||||
|
ArrayList candidates = new ArrayList(sz);
|
||||||
|
for (int i = 0; i < sz; i++)
|
||||||
|
candidates.add(new Integer(i));
|
||||||
|
boolean keepGoing = true;
|
||||||
|
int stackNumber = 0;
|
||||||
|
boolean someoneHasThisStack = true;
|
||||||
|
while (someoneHasThisStack && candidates.size() > 1) {
|
||||||
|
// maybe none of the candidates have stackNumber+1
|
||||||
|
// stacks. If none do, we'll quit.
|
||||||
|
someoneHasThisStack = false;
|
||||||
|
int maxGlyphsInThisStack = 0;
|
||||||
|
for (int k = 0; k < candidates.size(); k++) {
|
||||||
|
TStackList sl = up.get(((Integer)candidates.get(k)).intValue());
|
||||||
|
if (sl.size() > stackNumber) {
|
||||||
|
int ng;
|
||||||
|
if ((ng = sl.get(stackNumber).size()) > maxGlyphsInThisStack)
|
||||||
|
maxGlyphsInThisStack = ng;
|
||||||
|
someoneHasThisStack = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Remove all candidates that aren't keeping up.
|
||||||
|
if (someoneHasThisStack) {
|
||||||
|
for (int k = 0; k < candidates.size(); k++) {
|
||||||
|
TStackList sl = up.get(((Integer)candidates.get(k)).intValue());
|
||||||
|
if (sl.size() > stackNumber) {
|
||||||
|
if (sl.get(stackNumber).size() != maxGlyphsInThisStack)
|
||||||
|
candidates.remove(k--);
|
||||||
|
} else throw new Error("impossible!");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
++stackNumber;
|
||||||
|
}
|
||||||
|
if (candidates.size() == 1)
|
||||||
|
return up.get(((Integer)candidates.get(0)).intValue());
|
||||||
|
else
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
@ -161,13 +210,11 @@ class TParseTree {
|
||||||
return legalParsesWithVowelOnRoot;
|
return legalParsesWithVowelOnRoot;
|
||||||
else {
|
else {
|
||||||
if (legalParsesWithVowelOnRoot.size() == 2) {
|
if (legalParsesWithVowelOnRoot.size() == 2) {
|
||||||
// DLC is this even valid?
|
|
||||||
if (legalParsesWithVowelOnRoot.get(0).size() != 1 + legalParsesWithVowelOnRoot.get(1).size())
|
if (legalParsesWithVowelOnRoot.get(0).size() != 1 + legalParsesWithVowelOnRoot.get(1).size())
|
||||||
throw new Error("Something other than the G-YA vs. GYA case appeared. Sorry for your trouble! " + legalParsesWithVowelOnRoot.get(0) + " ;; " + legalParsesWithVowelOnRoot.get(1));
|
throw new Error("Something other than the G-YA vs. GYA case appeared. Sorry for your trouble! " + legalParsesWithVowelOnRoot.get(0) + " ;; " + legalParsesWithVowelOnRoot.get(1));
|
||||||
return new TStackListList(legalParsesWithVowelOnRoot.get(1));
|
return new TStackListList(legalParsesWithVowelOnRoot.get(1));
|
||||||
}
|
}
|
||||||
if (allLegalParses.size() == 2) {
|
if (allLegalParses.size() == 2) {
|
||||||
// DLC is this even valid?
|
|
||||||
if (allLegalParses.get(0).size() != 1 + allLegalParses.get(1).size())
|
if (allLegalParses.get(0).size() != 1 + allLegalParses.get(1).size())
|
||||||
throw new Error("Something other than the G-YA vs. GYA case appeared. Sorry for your trouble! " + allLegalParses.get(0) + " ;; " + allLegalParses.get(1));
|
throw new Error("Something other than the G-YA vs. GYA case appeared. Sorry for your trouble! " + allLegalParses.get(0) + " ;; " + allLegalParses.get(1));
|
||||||
return new TStackListList(allLegalParses.get(1));
|
return new TStackListList(allLegalParses.get(1));
|
||||||
|
@ -194,7 +241,65 @@ class TParseTree {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Returns null if this parse tree is perfectly legal and valid.
|
||||||
|
* Returns a warning for users otherwise. If and only if
|
||||||
|
* paranoid is true, then even unambiguous ACIP like PADMA, which
|
||||||
|
* could be improved by being written as PAD+MA, will cause a
|
||||||
|
* warning.
|
||||||
|
* @param paranoid true if you do not mind a lot of warnings
|
||||||
|
* @param pl the pair list from which this parse tree originated
|
||||||
|
* @param originalACIP the original ACIP, or null if you want
|
||||||
|
* this parse tree to make a best guess. */
|
||||||
|
public String getWarning(boolean paranoid,
|
||||||
|
TPairList pl,
|
||||||
|
String originalACIP) {
|
||||||
|
TStackListList up = getUniqueParse();
|
||||||
|
if (null == up || up.size() != 1) {
|
||||||
|
boolean isLastStack[] = new boolean[1];
|
||||||
|
TStackListList nip = getNonIllegalParses();
|
||||||
|
if (nip.size() != 1) {
|
||||||
|
if (null == getBestParse()) {
|
||||||
|
return "There's not even a unique, non-illegal parse for ACIP {" + ((null != originalACIP) ? originalACIP : recoverACIP()) + "}";
|
||||||
|
} else {
|
||||||
|
if (getBestParse().hasStackWithoutVowel(pl, isLastStack)) {
|
||||||
|
if (isLastStack[0]) {
|
||||||
|
return "Warning: The last stack does not have a vowel in the ACIP {" + ((null != originalACIP) ? originalACIP : recoverACIP()) + "}";
|
||||||
|
} else {
|
||||||
|
return "Warning: There is a stack, before the last stack, without a vowel in the ACIP {" + ((null != originalACIP) ? originalACIP : recoverACIP()) + "}";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (paranoid) {
|
||||||
|
return "Though the ACIP {" + ((null != originalACIP) ? originalACIP : recoverACIP()) + "} is unambiguous, it would be more computer-friendly if + signs were used to stack things because there are two (or more) ways to interpret this ACIP if you're not careful.";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (nip.get(0).hasStackWithoutVowel(pl, isLastStack)) {
|
||||||
|
if (isLastStack[0]) {
|
||||||
|
return "Warning: The last stack does not have a vowel in the ACIP {" + ((null != originalACIP) ? originalACIP : recoverACIP()) + "}";
|
||||||
|
} else {
|
||||||
|
return "Warning: There is a stack, before the last stack, without a vowel in the ACIP {" + ((null != originalACIP) ? originalACIP : recoverACIP()) + "}";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Returns something akin to the ACIP input (okay, maybe 1-2-3-4
|
||||||
|
* instead of 1234, and maybe AUTPA instead of AUT-PA)
|
||||||
|
* corresponding to this parse tree. */
|
||||||
|
public String recoverACIP() {
|
||||||
|
ParseIterator pi = getParseIterator();
|
||||||
|
if (pi.hasNext()) {
|
||||||
|
return pi.next().recoverACIP();
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
/** Returns a hashCode appropriate for use with our {@link
|
/** Returns a hashCode appropriate for use with our {@link
|
||||||
* #equals(Object)} method. */
|
* #equals(Object)} method. */
|
||||||
public int hashCode() { return al.hashCode(); }
|
public int hashCode() { return al.hashCode(); }
|
||||||
|
|
||||||
|
/** Returns true if and only if this parse tree is empty. */
|
||||||
|
public boolean isEmpty() { return al.isEmpty(); }
|
||||||
}
|
}
|
||||||
|
|
|
@ -69,15 +69,25 @@ class TStackList {
|
||||||
/** Returns true if and only if this list is empty. */
|
/** Returns true if and only if this list is empty. */
|
||||||
public boolean isEmpty() { return al.isEmpty(); }
|
public boolean isEmpty() { return al.isEmpty(); }
|
||||||
|
|
||||||
|
/** Returns the ACIP input (okay, maybe 1-2-3-4 instead of 1234)
|
||||||
|
* corresponding to this stack list. */
|
||||||
|
public String recoverACIP() {
|
||||||
|
return toStringHelper(false);
|
||||||
|
}
|
||||||
|
|
||||||
/** Returns a human-readable representation like {G}{YA} or
|
/** Returns a human-readable representation like {G}{YA} or
|
||||||
* {GYA}. */
|
* {GYA}. */
|
||||||
public String toString() {
|
public String toString() {
|
||||||
|
return toStringHelper(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String toStringHelper(boolean brackets) {
|
||||||
int sz = size();
|
int sz = size();
|
||||||
StringBuffer b = new StringBuffer();
|
StringBuffer b = new StringBuffer();
|
||||||
for (int i = 0; i < sz; i++) {
|
for (int i = 0; i < sz; i++) {
|
||||||
b.append('{');
|
if (brackets) b.append('{');
|
||||||
b.append(get(i).recoverACIP());
|
b.append(get(i).recoverACIP());
|
||||||
b.append('}');
|
if (brackets) b.append('}');
|
||||||
}
|
}
|
||||||
return b.toString();
|
return b.toString();
|
||||||
}
|
}
|
||||||
|
@ -152,18 +162,49 @@ class TStackList {
|
||||||
return new BoolPair(isLegal, isLegalAndHasAVowelOnRoot);
|
return new BoolPair(isLegal, isLegalAndHasAVowelOnRoot);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static final boolean ddebug = false;
|
||||||
|
|
||||||
/** Returns true if and only if this stack list contains a clearly
|
/** Returns true if and only if this stack list contains a clearly
|
||||||
* illegal construct, such as an TPair (V . something). */
|
* illegal construct, such as an TPair (V . something). */
|
||||||
boolean isClearlyIllegal() {
|
boolean isClearlyIllegal() {
|
||||||
// check for {D}{VA} sorts of things:
|
// check for {D}{VA} sorts of things:
|
||||||
for (int i = 0; i < size(); i++) {
|
for (int i = 0; i < size(); i++) {
|
||||||
if (get(i).getACIPError() != null) {
|
if (get(i).getACIPError() != null) {
|
||||||
System.out.println("DLC: error is " + get(i).getACIPError());
|
if (ddebug) System.out.println("ddebug: error is " + get(i).getACIPError());
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Returns true if and only if this stack list contains a stack
|
||||||
|
* that does not end in a vowel or disambiguator. Note that this
|
||||||
|
* is not erroneous for legal Tibetan like {BRTAN}, where {B} has
|
||||||
|
* no vowel, but it is a warning sign for Sanskrit stacks.
|
||||||
|
* @param opl the pair list from which this stack list
|
||||||
|
* originated
|
||||||
|
* @param isLastStack if non-null, then isLastStack[0] will be
|
||||||
|
* set to true if and only if the very last stack is the only
|
||||||
|
* stack not to have a vowel or disambiguator on it */
|
||||||
|
boolean hasStackWithoutVowel(TPairList opl, boolean[] isLastStack) {
|
||||||
|
int runningSize = 0;
|
||||||
|
for (int i = 0; i < size(); i++) {
|
||||||
|
TPairList pl = get(i);
|
||||||
|
String l;
|
||||||
|
TPair lastPair = opl.getNthNonDisambiguatorPair(runningSize + pl.size() - 1);
|
||||||
|
runningSize += pl.size();
|
||||||
|
if (null == lastPair.getRight()
|
||||||
|
&& !((l = lastPair.getLeft()) != null && l.length() == 1
|
||||||
|
&& l.charAt(0) >= '0' && l.charAt(0) <= '9')) {
|
||||||
|
if (null != isLastStack)
|
||||||
|
isLastStack[0] = (i + 1 == size());
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (runningSize != opl.sizeMinusDisambiguators())
|
||||||
|
throw new IllegalArgumentException("opl (" + opl + ") is bad for this stack list (" + toString() + ")");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
class BoolPair {
|
class BoolPair {
|
||||||
|
|
|
@ -83,4 +83,11 @@ class TStackListList {
|
||||||
* iterating and you'll have to read the code to know what will
|
* iterating and you'll have to read the code to know what will
|
||||||
* happen. */
|
* happen. */
|
||||||
public ListIterator listIterator() { return al.listIterator(); }
|
public ListIterator listIterator() { return al.listIterator(); }
|
||||||
|
|
||||||
|
/** Returns the ACIP input (okay, maybe 1-2-3-4 instead of 1234)
|
||||||
|
* corresponding to this stack list list. */
|
||||||
|
public String recoverACIP() {
|
||||||
|
if (isEmpty()) return null;
|
||||||
|
return get(0).recoverACIP();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue