The ACIP->Tibetan converter now has perfect low-level functionality,

and it has the capability to produce error messages and warnings that
make sense to the user.  One can now get the correct parse, if one
exists, for an ACIP tsheg bar.

One could even feed in ACIP and get a list of warnings about things as
innocuous as PADMA, which a dumb converter would have trouble with.
One could then turn ACIP into well-behaved ACIP for that dumb
converter, if you really wanted to.

Still to do:

o Scan ACIP files into tsheg bars.
o Produce TMW/Latin (from which you can get Unicode, etc.).
o E-mail the illegal tsheg bars to the ACIP fellows so they can fix
  the affected documents (most of the Kangyur has unparseable
  creatures).
This commit is contained in:
dchandler 2003-08-12 04:13:11 +00:00
parent 87266646fb
commit 57f506384f
5 changed files with 258 additions and 38 deletions

View file

@ -50,25 +50,28 @@ public class PackageTest extends TestCase {
public PackageTest() { }
private static void tstHelper(String acip) {
tstHelper2(acip, null, false, null, null);
tstHelper2(acip, null, false, null, null, null);
}
private static void tstHelper(String acip, String expectedPairs) {
tstHelper2(acip, expectedPairs, false, null, null);
tstHelper2(acip, expectedPairs, false, null, null, null);
}
private static void tstHelper(String acip, String[] expectedParses) {
tstHelper2(acip, null, false, expectedParses, null);
tstHelper2(acip, null, false, expectedParses, null, null);
}
private static void tstHelper(String acip, String expectedPairs, String[] expectedParses) {
tstHelper2(acip, expectedPairs, false, expectedParses, null);
tstHelper2(acip, expectedPairs, false, expectedParses, null, null);
}
private static void tstHelper(String acip, String expectedPairs, String[] expectedParses, String[] legalParses) {
tstHelper2(acip, expectedPairs, false, expectedParses, legalParses);
tstHelper2(acip, expectedPairs, false, expectedParses, legalParses, null);
}
private static void tstHelper(String acip, String expectedPairs, String[] expectedParses, String[] legalParses, String expectedBestParse) {
tstHelper2(acip, expectedPairs, false, expectedParses, legalParses, expectedBestParse);
}
private static void tstHelper2(String acip) {
tstHelper2(acip, null);
}
private static void tstHelper2(String acip, String expectedPairs) {
tstHelper2(acip, expectedPairs, true, null, null);
tstHelper2(acip, expectedPairs, true, null, null, null);
}
private static final boolean sdebug = false;
@ -76,7 +79,8 @@ public class PackageTest extends TestCase {
String expectedPairs,
boolean debug,
String[] expectedParses,
String[] expectedLegalParses) {
String[] expectedLegalParses,
String expectedBestParse) {
TPairList l = TPairListFactory.breakACIPIntoChunks(acip);
if (sdebug || debug)
System.out.println("ACIP=" + acip + " and l'=" + l);
@ -95,6 +99,11 @@ public class PackageTest extends TestCase {
assertTrue(null == expectedParses || expectedParses.length == 0);
assertTrue(null == expectedLegalParses || expectedLegalParses.length == 0);
return;
} else {
if (pt.getWarning(false, l, acip) != null) {
System.out.println(pt.getWarning(false, l, acip));
} else if (pt.getWarning(true, l, acip) != null)
if (sdebug || debug) System.out.println("Paranoiac warning is this: " + pt.getWarning(true, l, acip));
}
int np = pt.numberOfParses();
boolean goodness = expectedParses == null || expectedParses.length == np;
@ -136,13 +145,14 @@ public class PackageTest extends TestCase {
assertTrue(goodness2);
TStackListList allLegalParses = pt.getLegalParses();
TStackListList decentParses = pt.getNonIllegalParses();
if (legalParses.size() != 1 && true && pt.getBestParse() == null) {
if (pt.getBestParse() == null) {
if (legalParses.size() == 0) {
if (pt.getBestParse() != null) {
System.out.print("There is a best parse for the slightly rocky ACIP {" + acip + "}; ");
} else {
System.out.print("ACIPNoBestParseError: There is no best parse for the ACIP {" + acip + "}; ");
if (null != expectedBestParse && !"".equals(expectedBestParse)) {
System.out.print("Expected is that there is a best parse \"" + expectedBestParse + "\" but there is no best parse for acip {" + acip + "}");
assertTrue(false);
}
System.out.print("ACIPNoBestParseError: There is no best parse for the ACIP {" + acip + "}; ");
if (decentParses.size() == 1) {
System.out.println("ACIPNoLegalParseError: NO LEGAL PARSE for the unambiguous ACIP {" + acip + "} (i.e., exactly one illegal parse exists, so it's unambiguous)");
// DLC FIXME: it's really unambiguous if one illegal parse has fewer glyphs than any other? {shthA'I} might be an example... but NO! because you go left-to-right to make stacks, except think of BRTAN vs. BRAN, they break that rule... ???? DLC ????
@ -150,12 +160,27 @@ public class PackageTest extends TestCase {
System.out.println("ACIPNoLegalParseError: NO PARSES for ACIP {" + acip + "}, decent parses are " + decentParses);
}
} else {
System.out.println("ACIPTooManyLegalParsesError: see these " + legalParses.size() + " legal parses for acip " + acip + ": " + legalParses);
assertTrue(legalParses.size() == 2
&& (legalParses.get(0).size()
== 1 + legalParses.get(1).size()));
if (legalParses.size() > 1) {
System.out.println("ACIPTooManyLegalParsesError: see these " + legalParses.size() + " legal parses for acip " + acip + ": " + legalParses);
assertTrue(legalParses.size() == 2
&& (legalParses.get(0).size()
== 1 + legalParses.get(1).size()));
}
}
} else {
if (legalParses.size() != 1) {
if (sdebug || debug) System.out.println("Best parse exists but there are none or two or more legal parses for ACIP {" + acip + "}");
}
if (null != expectedBestParse) {
boolean good = pt.getBestParse().equals(expectedBestParse);
if (!good) {
System.out.print("Expected best parse is \"" + expectedBestParse + "\" but the best parse is " + pt.getBestParse() + " for acip {" + acip + "}");
}
assertTrue(good);
}
if (sdebug || debug) System.out.println("There is a best parse for the slightly rocky ACIP {" + acip + "}");
}
if (allLegalParses.size() != legalParses.size()) {
if (sdebug || debug)
System.out.println("allLegalParses are " + allLegalParses + " and legalParses are " + legalParses);
@ -195,13 +220,28 @@ public class PackageTest extends TestCase {
public void testPerformance() {
tstHelper("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX");
tstHelper("901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678");
tstHelper("9012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678");
}
/** Tests {@link TPairListFactory#breakACIPIntoChunks(String)},
* {@link TPairList#getACIPError()}, and {@link
* TPairList#recoverACIP()}. */
public void testBreakACIPIntoChunks() {
tstHelper("AUTPA", "{AU}{T}{PA}",
new String[] { "{AU}{T}{PA}", "{AU}{T+PA}" },
new String[] { },
"{AU}{T+PA}");
tstHelper("PADMA", "{PA}{D}{MA}",
null,
null);
tstHelper("PADMA", "{PA}{D}{MA}",
new String[] { "{PA}{D}{MA}", "{PA}{D+MA}" },
new String[] { },
"{PA}{D+MA}");
tstHelper("PADMDM", "{PA}{D}{M}{D}{M}",
null,
new String[] { },
"{PA}{D+M}{D+M}");
tstHelper("GRVA'I", "{G}{R}{VA}{'I}",
new String[] { "{G}{R+VA}{'I}", "{G+R+VA}{'I}" },
new String[] { "{G+R+VA}{'I}" });
@ -1749,7 +1789,6 @@ tstHelper("CAM");
tstHelper("CAN");
tstHelper("CANG");
tstHelper("CAR");
tstHelper("CARVED");
tstHelper("CAS");
tstHelper("CE");
tstHelper("CE'AM");

View file

@ -54,6 +54,31 @@ class TPairList {
/** Returns the ith pair in this list. */
public TPair get(int i) { return (TPair)al.get(i); }
/** Returns the ith non-disambiguator pair in this list. This is
* O(size()). */
public TPair getNthNonDisambiguatorPair(int n) {
TPair p;
int count = 0;
for (int i = 0; i < size(); i++) {
p = get(i);
if (!p.isDisambiguator())
if (count++ == n)
return p;
}
throw new IllegalArgumentException("n, " + n + " is too big for this list of pairs, " + toString());
}
/** Returns the number of pairs in this list that are not entirely
* disambiguators. */
public int sizeMinusDisambiguators() {
int count = 0;
for (int i = 0; i < size(); i++) {
if (!get(i).isDisambiguator())
++count;
}
return count;
}
/** Adds p to the end of this list. */
public void add(TPair p) {
if (p == null || (p.getLeft() == null && p.getRight() == null))
@ -253,11 +278,12 @@ class TPairList {
// DLC TEST: BA'I has exactly two syntactically legal parses but just one TStackList.
/** Returns a set (as as ArrayList) of all possible
* TStackLists. Uses knowledge of Tibetan spelling rules
* (i.e., tsheg bar syntax) to do so. If this list of pairs has
* something clearly illegal in it, or is empty, or is merely a
* list of disambiguators etc., then this returns null. */
/** Returns a set (as as ArrayList) of all possible TStackLists.
* Uses knowledge of Tibetan spelling rules (i.e., tsheg bar
* syntax) to do so. If this list of pairs has something clearly
* illegal in it, or is empty, or is merely a list of
* disambiguators etc., then this returns null. Never returns an
* empty parse tree. */
public TParseTree getParseTree() {
TParseTree pt = new TParseTree();
int sz = size();
@ -308,7 +334,7 @@ class TPairList {
// give a nice error message in this case.
if (ddebug) System.out.println("ddebug: we're going to do 2^" + (j-i+1) + " [or " + (1 << (j-i+1)) + "] wacky iterations!");
if ((j-i+1) > 13) // if you don't use 13, then change PackageTest.testSlowestTshegBar().
return new TParseTree();
return null;
boolean keepGoing = true;
TStackListList sll = new TStackListList();
@ -423,6 +449,7 @@ class TPairList {
// your tsheg bar.
}
}
if (pt.isEmpty()) return null;
return pt;
}
@ -533,7 +560,8 @@ class TPairList {
}
}
if (null == thislWylie) throw new Error("BADNESS AT MAXIMUM: p is " + p + " and thislWylie is " + thislWylie);
if (null == thislWylie)
throw new Error("BADNESS AT MAXIMUM: p is " + p + " and thislWylie is " + thislWylie);
lWylie.append(thislWylie);
StringBuffer ll = new StringBuffer(lWylie.toString());
int ww;
@ -542,11 +570,11 @@ class TPairList {
ll.deleteCharAt(ww);
boolean isTibetan = TibetanMachineWeb.isWylieTibetanConsonantOrConsonantStack(ll.toString());
boolean isSanskrit = TibetanMachineWeb.isWylieSanskritConsonantStack(lWylie.toString());
if (!isTibetan && !isSanskrit && !isNumeric && true) {
if (ddebug && !isTibetan && !isSanskrit && !isNumeric) {
System.out.println("DLC: OTHER for " + lWylie + " with vowel " + ACIPRules.getWylieForACIPVowel(p.getRight()) + " and p.getRight()=" + p.getRight());
}
if (isTibetan && isSanskrit) isSanskrit = false; // RVA, e.g.
if (true && hasNonAVowel && ACIPRules.getWylieForACIPVowel(p.getRight()) == null) {
if (ddebug && hasNonAVowel && ACIPRules.getWylieForACIPVowel(p.getRight()) == null) {
System.out.println("DLC: vowel " + ACIPRules.getWylieForACIPVowel(p.getRight()) + " and p.getRight()=" + p.getRight());
}
TGCPair tp;

View file

@ -32,7 +32,7 @@ class TParseTree {
/** Creates an empty list. */
public TParseTree() { }
/** Returns the ith pair in this list. */
/** Returns the ith list of stack lists in this parse tree. */
public TStackListList get(int i) { return (TStackListList)al.get(i); }
/** Adds p to the end of this list. */
@ -98,8 +98,8 @@ class TParseTree {
return sll;
}
/** Returns a list containing the parses of this parse tree that
* are not clearly illegal. */
/** Returns a list (never null) containing the parses of this
* parse tree that are not clearly illegal. */
public TStackListList getNonIllegalParses() {
TStackListList sll = new TStackListList(2); // save memory
ParseIterator pi = getParseIterator();
@ -124,12 +124,61 @@ class TParseTree {
TStackListList up = getUniqueParse();
if (up.size() == 1)
return up.get(0);
else if (up.size() == 2) {
}
up = getNonIllegalParses();
int sz = up.size();
if (up.size() == 1) {
if (sz == 1) {
return up.get(0);
} else if (sz > 1) {
// {PADMA}, for example. Our technique is to go from the
// left and stack as much as we can. So {PA}{D}{MA} is
// inferior to {PA}{D+MA}, and {PA}{D+MA}{D}{MA} is
// inferior to {PA}{D+MA}{D+MA}. We do not look for the
// minimum number of glyphs, though -- {PA}{N+D}{B+H+R}
// and {PA}{N}{D+B+H+R} tie by that score, but the former
// is the clear winner.
// We give a warning about these, optionally, so that
// users can produce output that even a dumb ACIP reader
// can understand. See getWarning(true, ..).
// if j is in this list, then up.get(j) is still a
// potential winner.
ArrayList candidates = new ArrayList(sz);
for (int i = 0; i < sz; i++)
candidates.add(new Integer(i));
boolean keepGoing = true;
int stackNumber = 0;
boolean someoneHasThisStack = true;
while (someoneHasThisStack && candidates.size() > 1) {
// maybe none of the candidates have stackNumber+1
// stacks. If none do, we'll quit.
someoneHasThisStack = false;
int maxGlyphsInThisStack = 0;
for (int k = 0; k < candidates.size(); k++) {
TStackList sl = up.get(((Integer)candidates.get(k)).intValue());
if (sl.size() > stackNumber) {
int ng;
if ((ng = sl.get(stackNumber).size()) > maxGlyphsInThisStack)
maxGlyphsInThisStack = ng;
someoneHasThisStack = true;
}
}
// Remove all candidates that aren't keeping up.
if (someoneHasThisStack) {
for (int k = 0; k < candidates.size(); k++) {
TStackList sl = up.get(((Integer)candidates.get(k)).intValue());
if (sl.size() > stackNumber) {
if (sl.get(stackNumber).size() != maxGlyphsInThisStack)
candidates.remove(k--);
} else throw new Error("impossible!");
}
}
++stackNumber;
}
if (candidates.size() == 1)
return up.get(((Integer)candidates.get(0)).intValue());
else
return null;
}
return null;
}
@ -161,13 +210,11 @@ class TParseTree {
return legalParsesWithVowelOnRoot;
else {
if (legalParsesWithVowelOnRoot.size() == 2) {
// DLC is this even valid?
if (legalParsesWithVowelOnRoot.get(0).size() != 1 + legalParsesWithVowelOnRoot.get(1).size())
throw new Error("Something other than the G-YA vs. GYA case appeared. Sorry for your trouble! " + legalParsesWithVowelOnRoot.get(0) + " ;; " + legalParsesWithVowelOnRoot.get(1));
return new TStackListList(legalParsesWithVowelOnRoot.get(1));
}
if (allLegalParses.size() == 2) {
// DLC is this even valid?
if (allLegalParses.get(0).size() != 1 + allLegalParses.get(1).size())
throw new Error("Something other than the G-YA vs. GYA case appeared. Sorry for your trouble! " + allLegalParses.get(0) + " ;; " + allLegalParses.get(1));
return new TStackListList(allLegalParses.get(1));
@ -194,7 +241,65 @@ class TParseTree {
return false;
}
/** Returns null if this parse tree is perfectly legal and valid.
* Returns a warning for users otherwise. If and only if
* paranoid is true, then even unambiguous ACIP like PADMA, which
* could be improved by being written as PAD+MA, will cause a
* warning.
* @param paranoid true if you do not mind a lot of warnings
* @param pl the pair list from which this parse tree originated
* @param originalACIP the original ACIP, or null if you want
* this parse tree to make a best guess. */
public String getWarning(boolean paranoid,
TPairList pl,
String originalACIP) {
TStackListList up = getUniqueParse();
if (null == up || up.size() != 1) {
boolean isLastStack[] = new boolean[1];
TStackListList nip = getNonIllegalParses();
if (nip.size() != 1) {
if (null == getBestParse()) {
return "There's not even a unique, non-illegal parse for ACIP {" + ((null != originalACIP) ? originalACIP : recoverACIP()) + "}";
} else {
if (getBestParse().hasStackWithoutVowel(pl, isLastStack)) {
if (isLastStack[0]) {
return "Warning: The last stack does not have a vowel in the ACIP {" + ((null != originalACIP) ? originalACIP : recoverACIP()) + "}";
} else {
return "Warning: There is a stack, before the last stack, without a vowel in the ACIP {" + ((null != originalACIP) ? originalACIP : recoverACIP()) + "}";
}
}
if (paranoid) {
return "Though the ACIP {" + ((null != originalACIP) ? originalACIP : recoverACIP()) + "} is unambiguous, it would be more computer-friendly if + signs were used to stack things because there are two (or more) ways to interpret this ACIP if you're not careful.";
}
}
} else {
if (nip.get(0).hasStackWithoutVowel(pl, isLastStack)) {
if (isLastStack[0]) {
return "Warning: The last stack does not have a vowel in the ACIP {" + ((null != originalACIP) ? originalACIP : recoverACIP()) + "}";
} else {
return "Warning: There is a stack, before the last stack, without a vowel in the ACIP {" + ((null != originalACIP) ? originalACIP : recoverACIP()) + "}";
}
}
}
}
return null;
}
/** Returns something akin to the ACIP input (okay, maybe 1-2-3-4
* instead of 1234, and maybe AUTPA instead of AUT-PA)
* corresponding to this parse tree. */
public String recoverACIP() {
ParseIterator pi = getParseIterator();
if (pi.hasNext()) {
return pi.next().recoverACIP();
}
return null;
}
/** Returns a hashCode appropriate for use with our {@link
* #equals(Object)} method. */
public int hashCode() { return al.hashCode(); }
/** Returns true if and only if this parse tree is empty. */
public boolean isEmpty() { return al.isEmpty(); }
}

View file

@ -69,15 +69,25 @@ class TStackList {
/** Returns true if and only if this list is empty. */
public boolean isEmpty() { return al.isEmpty(); }
/** Returns the ACIP input (okay, maybe 1-2-3-4 instead of 1234)
* corresponding to this stack list. */
public String recoverACIP() {
return toStringHelper(false);
}
/** Returns a human-readable representation like {G}{YA} or
* {GYA}. */
public String toString() {
return toStringHelper(true);
}
private String toStringHelper(boolean brackets) {
int sz = size();
StringBuffer b = new StringBuffer();
for (int i = 0; i < sz; i++) {
b.append('{');
if (brackets) b.append('{');
b.append(get(i).recoverACIP());
b.append('}');
if (brackets) b.append('}');
}
return b.toString();
}
@ -152,18 +162,49 @@ class TStackList {
return new BoolPair(isLegal, isLegalAndHasAVowelOnRoot);
}
private static final boolean ddebug = false;
/** Returns true if and only if this stack list contains a clearly
* illegal construct, such as an TPair (V . something). */
boolean isClearlyIllegal() {
// check for {D}{VA} sorts of things:
for (int i = 0; i < size(); i++) {
if (get(i).getACIPError() != null) {
System.out.println("DLC: error is " + get(i).getACIPError());
if (ddebug) System.out.println("ddebug: error is " + get(i).getACIPError());
return true;
}
}
return false;
}
/** Returns true if and only if this stack list contains a stack
* that does not end in a vowel or disambiguator. Note that this
* is not erroneous for legal Tibetan like {BRTAN}, where {B} has
* no vowel, but it is a warning sign for Sanskrit stacks.
* @param opl the pair list from which this stack list
* originated
* @param isLastStack if non-null, then isLastStack[0] will be
* set to true if and only if the very last stack is the only
* stack not to have a vowel or disambiguator on it */
boolean hasStackWithoutVowel(TPairList opl, boolean[] isLastStack) {
int runningSize = 0;
for (int i = 0; i < size(); i++) {
TPairList pl = get(i);
String l;
TPair lastPair = opl.getNthNonDisambiguatorPair(runningSize + pl.size() - 1);
runningSize += pl.size();
if (null == lastPair.getRight()
&& !((l = lastPair.getLeft()) != null && l.length() == 1
&& l.charAt(0) >= '0' && l.charAt(0) <= '9')) {
if (null != isLastStack)
isLastStack[0] = (i + 1 == size());
return true;
}
}
if (runningSize != opl.sizeMinusDisambiguators())
throw new IllegalArgumentException("opl (" + opl + ") is bad for this stack list (" + toString() + ")");
return false;
}
}
class BoolPair {

View file

@ -83,4 +83,11 @@ class TStackListList {
* iterating and you'll have to read the code to know what will
* happen. */
public ListIterator listIterator() { return al.listIterator(); }
/** Returns the ACIP input (okay, maybe 1-2-3-4 instead of 1234)
* corresponding to this stack list list. */
public String recoverACIP() {
if (isEmpty()) return null;
return get(0).recoverACIP();
}
}