diff --git a/source/org/thdl/tib/text/ttt/PackageTest.java b/source/org/thdl/tib/text/ttt/PackageTest.java index 3521c5f..ed83876 100644 --- a/source/org/thdl/tib/text/ttt/PackageTest.java +++ b/source/org/thdl/tib/text/ttt/PackageTest.java @@ -50,25 +50,28 @@ public class PackageTest extends TestCase { public PackageTest() { } private static void tstHelper(String acip) { - tstHelper2(acip, null, false, null, null); + tstHelper2(acip, null, false, null, null, null); } private static void tstHelper(String acip, String expectedPairs) { - tstHelper2(acip, expectedPairs, false, null, null); + tstHelper2(acip, expectedPairs, false, null, null, null); } private static void tstHelper(String acip, String[] expectedParses) { - tstHelper2(acip, null, false, expectedParses, null); + tstHelper2(acip, null, false, expectedParses, null, null); } private static void tstHelper(String acip, String expectedPairs, String[] expectedParses) { - tstHelper2(acip, expectedPairs, false, expectedParses, null); + tstHelper2(acip, expectedPairs, false, expectedParses, null, null); } private static void tstHelper(String acip, String expectedPairs, String[] expectedParses, String[] legalParses) { - tstHelper2(acip, expectedPairs, false, expectedParses, legalParses); + tstHelper2(acip, expectedPairs, false, expectedParses, legalParses, null); + } + private static void tstHelper(String acip, String expectedPairs, String[] expectedParses, String[] legalParses, String expectedBestParse) { + tstHelper2(acip, expectedPairs, false, expectedParses, legalParses, expectedBestParse); } private static void tstHelper2(String acip) { tstHelper2(acip, null); } private static void tstHelper2(String acip, String expectedPairs) { - tstHelper2(acip, expectedPairs, true, null, null); + tstHelper2(acip, expectedPairs, true, null, null, null); } private static final boolean sdebug = false; @@ -76,7 +79,8 @@ public class PackageTest extends TestCase { String expectedPairs, boolean debug, String[] expectedParses, - String[] expectedLegalParses) { + String[] expectedLegalParses, + String expectedBestParse) { TPairList l = TPairListFactory.breakACIPIntoChunks(acip); if (sdebug || debug) System.out.println("ACIP=" + acip + " and l'=" + l); @@ -95,6 +99,11 @@ public class PackageTest extends TestCase { assertTrue(null == expectedParses || expectedParses.length == 0); assertTrue(null == expectedLegalParses || expectedLegalParses.length == 0); return; + } else { + if (pt.getWarning(false, l, acip) != null) { + System.out.println(pt.getWarning(false, l, acip)); + } else if (pt.getWarning(true, l, acip) != null) + if (sdebug || debug) System.out.println("Paranoiac warning is this: " + pt.getWarning(true, l, acip)); } int np = pt.numberOfParses(); boolean goodness = expectedParses == null || expectedParses.length == np; @@ -136,13 +145,14 @@ public class PackageTest extends TestCase { assertTrue(goodness2); TStackListList allLegalParses = pt.getLegalParses(); TStackListList decentParses = pt.getNonIllegalParses(); - if (legalParses.size() != 1 && true && pt.getBestParse() == null) { + if (pt.getBestParse() == null) { if (legalParses.size() == 0) { - if (pt.getBestParse() != null) { - System.out.print("There is a best parse for the slightly rocky ACIP {" + acip + "}; "); - } else { - System.out.print("ACIPNoBestParseError: There is no best parse for the ACIP {" + acip + "}; "); + if (null != expectedBestParse && !"".equals(expectedBestParse)) { + System.out.print("Expected is that there is a best parse \"" + expectedBestParse + "\" but there is no best parse for acip {" + acip + "}"); + assertTrue(false); } + System.out.print("ACIPNoBestParseError: There is no best parse for the ACIP {" + acip + "}; "); + if (decentParses.size() == 1) { System.out.println("ACIPNoLegalParseError: NO LEGAL PARSE for the unambiguous ACIP {" + acip + "} (i.e., exactly one illegal parse exists, so it's unambiguous)"); // DLC FIXME: it's really unambiguous if one illegal parse has fewer glyphs than any other? {shthA'I} might be an example... but NO! because you go left-to-right to make stacks, except think of BRTAN vs. BRAN, they break that rule... ???? DLC ???? @@ -150,12 +160,27 @@ public class PackageTest extends TestCase { System.out.println("ACIPNoLegalParseError: NO PARSES for ACIP {" + acip + "}, decent parses are " + decentParses); } } else { - System.out.println("ACIPTooManyLegalParsesError: see these " + legalParses.size() + " legal parses for acip " + acip + ": " + legalParses); - assertTrue(legalParses.size() == 2 - && (legalParses.get(0).size() - == 1 + legalParses.get(1).size())); + if (legalParses.size() > 1) { + System.out.println("ACIPTooManyLegalParsesError: see these " + legalParses.size() + " legal parses for acip " + acip + ": " + legalParses); + assertTrue(legalParses.size() == 2 + && (legalParses.get(0).size() + == 1 + legalParses.get(1).size())); + } } + } else { + if (legalParses.size() != 1) { + if (sdebug || debug) System.out.println("Best parse exists but there are none or two or more legal parses for ACIP {" + acip + "}"); + } + if (null != expectedBestParse) { + boolean good = pt.getBestParse().equals(expectedBestParse); + if (!good) { + System.out.print("Expected best parse is \"" + expectedBestParse + "\" but the best parse is " + pt.getBestParse() + " for acip {" + acip + "}"); + } + assertTrue(good); + } + if (sdebug || debug) System.out.println("There is a best parse for the slightly rocky ACIP {" + acip + "}"); } + if (allLegalParses.size() != legalParses.size()) { if (sdebug || debug) System.out.println("allLegalParses are " + allLegalParses + " and legalParses are " + legalParses); @@ -195,13 +220,28 @@ public class PackageTest extends TestCase { public void testPerformance() { tstHelper("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"); - tstHelper("901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678"); + tstHelper("9012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678"); } /** Tests {@link TPairListFactory#breakACIPIntoChunks(String)}, * {@link TPairList#getACIPError()}, and {@link * TPairList#recoverACIP()}. */ public void testBreakACIPIntoChunks() { + tstHelper("AUTPA", "{AU}{T}{PA}", + new String[] { "{AU}{T}{PA}", "{AU}{T+PA}" }, + new String[] { }, + "{AU}{T+PA}"); + tstHelper("PADMA", "{PA}{D}{MA}", + null, + null); + tstHelper("PADMA", "{PA}{D}{MA}", + new String[] { "{PA}{D}{MA}", "{PA}{D+MA}" }, + new String[] { }, + "{PA}{D+MA}"); + tstHelper("PADMDM", "{PA}{D}{M}{D}{M}", + null, + new String[] { }, + "{PA}{D+M}{D+M}"); tstHelper("GRVA'I", "{G}{R}{VA}{'I}", new String[] { "{G}{R+VA}{'I}", "{G+R+VA}{'I}" }, new String[] { "{G+R+VA}{'I}" }); @@ -1749,7 +1789,6 @@ tstHelper("CAM"); tstHelper("CAN"); tstHelper("CANG"); tstHelper("CAR"); -tstHelper("CARVED"); tstHelper("CAS"); tstHelper("CE"); tstHelper("CE'AM"); diff --git a/source/org/thdl/tib/text/ttt/TPairList.java b/source/org/thdl/tib/text/ttt/TPairList.java index 1b0dd23..5169142 100644 --- a/source/org/thdl/tib/text/ttt/TPairList.java +++ b/source/org/thdl/tib/text/ttt/TPairList.java @@ -54,6 +54,31 @@ class TPairList { /** Returns the ith pair in this list. */ public TPair get(int i) { return (TPair)al.get(i); } + /** Returns the ith non-disambiguator pair in this list. This is + * O(size()). */ + public TPair getNthNonDisambiguatorPair(int n) { + TPair p; + int count = 0; + for (int i = 0; i < size(); i++) { + p = get(i); + if (!p.isDisambiguator()) + if (count++ == n) + return p; + } + throw new IllegalArgumentException("n, " + n + " is too big for this list of pairs, " + toString()); + } + + /** Returns the number of pairs in this list that are not entirely + * disambiguators. */ + public int sizeMinusDisambiguators() { + int count = 0; + for (int i = 0; i < size(); i++) { + if (!get(i).isDisambiguator()) + ++count; + } + return count; + } + /** Adds p to the end of this list. */ public void add(TPair p) { if (p == null || (p.getLeft() == null && p.getRight() == null)) @@ -253,11 +278,12 @@ class TPairList { // DLC TEST: BA'I has exactly two syntactically legal parses but just one TStackList. - /** Returns a set (as as ArrayList) of all possible - * TStackLists. Uses knowledge of Tibetan spelling rules - * (i.e., tsheg bar syntax) to do so. If this list of pairs has - * something clearly illegal in it, or is empty, or is merely a - * list of disambiguators etc., then this returns null. */ + /** Returns a set (as as ArrayList) of all possible TStackLists. + * Uses knowledge of Tibetan spelling rules (i.e., tsheg bar + * syntax) to do so. If this list of pairs has something clearly + * illegal in it, or is empty, or is merely a list of + * disambiguators etc., then this returns null. Never returns an + * empty parse tree. */ public TParseTree getParseTree() { TParseTree pt = new TParseTree(); int sz = size(); @@ -308,7 +334,7 @@ class TPairList { // give a nice error message in this case. if (ddebug) System.out.println("ddebug: we're going to do 2^" + (j-i+1) + " [or " + (1 << (j-i+1)) + "] wacky iterations!"); if ((j-i+1) > 13) // if you don't use 13, then change PackageTest.testSlowestTshegBar(). - return new TParseTree(); + return null; boolean keepGoing = true; TStackListList sll = new TStackListList(); @@ -423,6 +449,7 @@ class TPairList { // your tsheg bar. } } + if (pt.isEmpty()) return null; return pt; } @@ -533,7 +560,8 @@ class TPairList { } } - if (null == thislWylie) throw new Error("BADNESS AT MAXIMUM: p is " + p + " and thislWylie is " + thislWylie); + if (null == thislWylie) + throw new Error("BADNESS AT MAXIMUM: p is " + p + " and thislWylie is " + thislWylie); lWylie.append(thislWylie); StringBuffer ll = new StringBuffer(lWylie.toString()); int ww; @@ -542,11 +570,11 @@ class TPairList { ll.deleteCharAt(ww); boolean isTibetan = TibetanMachineWeb.isWylieTibetanConsonantOrConsonantStack(ll.toString()); boolean isSanskrit = TibetanMachineWeb.isWylieSanskritConsonantStack(lWylie.toString()); - if (!isTibetan && !isSanskrit && !isNumeric && true) { + if (ddebug && !isTibetan && !isSanskrit && !isNumeric) { System.out.println("DLC: OTHER for " + lWylie + " with vowel " + ACIPRules.getWylieForACIPVowel(p.getRight()) + " and p.getRight()=" + p.getRight()); } if (isTibetan && isSanskrit) isSanskrit = false; // RVA, e.g. - if (true && hasNonAVowel && ACIPRules.getWylieForACIPVowel(p.getRight()) == null) { + if (ddebug && hasNonAVowel && ACIPRules.getWylieForACIPVowel(p.getRight()) == null) { System.out.println("DLC: vowel " + ACIPRules.getWylieForACIPVowel(p.getRight()) + " and p.getRight()=" + p.getRight()); } TGCPair tp; diff --git a/source/org/thdl/tib/text/ttt/TParseTree.java b/source/org/thdl/tib/text/ttt/TParseTree.java index ea428f2..ea83648 100644 --- a/source/org/thdl/tib/text/ttt/TParseTree.java +++ b/source/org/thdl/tib/text/ttt/TParseTree.java @@ -32,7 +32,7 @@ class TParseTree { /** Creates an empty list. */ public TParseTree() { } - /** Returns the ith pair in this list. */ + /** Returns the ith list of stack lists in this parse tree. */ public TStackListList get(int i) { return (TStackListList)al.get(i); } /** Adds p to the end of this list. */ @@ -98,8 +98,8 @@ class TParseTree { return sll; } - /** Returns a list containing the parses of this parse tree that - * are not clearly illegal. */ + /** Returns a list (never null) containing the parses of this + * parse tree that are not clearly illegal. */ public TStackListList getNonIllegalParses() { TStackListList sll = new TStackListList(2); // save memory ParseIterator pi = getParseIterator(); @@ -124,12 +124,61 @@ class TParseTree { TStackListList up = getUniqueParse(); if (up.size() == 1) return up.get(0); - else if (up.size() == 2) { - } up = getNonIllegalParses(); int sz = up.size(); - if (up.size() == 1) { + if (sz == 1) { return up.get(0); + } else if (sz > 1) { + // {PADMA}, for example. Our technique is to go from the + // left and stack as much as we can. So {PA}{D}{MA} is + // inferior to {PA}{D+MA}, and {PA}{D+MA}{D}{MA} is + // inferior to {PA}{D+MA}{D+MA}. We do not look for the + // minimum number of glyphs, though -- {PA}{N+D}{B+H+R} + // and {PA}{N}{D+B+H+R} tie by that score, but the former + // is the clear winner. + + // We give a warning about these, optionally, so that + // users can produce output that even a dumb ACIP reader + // can understand. See getWarning(true, ..). + + // if j is in this list, then up.get(j) is still a + // potential winner. + ArrayList candidates = new ArrayList(sz); + for (int i = 0; i < sz; i++) + candidates.add(new Integer(i)); + boolean keepGoing = true; + int stackNumber = 0; + boolean someoneHasThisStack = true; + while (someoneHasThisStack && candidates.size() > 1) { + // maybe none of the candidates have stackNumber+1 + // stacks. If none do, we'll quit. + someoneHasThisStack = false; + int maxGlyphsInThisStack = 0; + for (int k = 0; k < candidates.size(); k++) { + TStackList sl = up.get(((Integer)candidates.get(k)).intValue()); + if (sl.size() > stackNumber) { + int ng; + if ((ng = sl.get(stackNumber).size()) > maxGlyphsInThisStack) + maxGlyphsInThisStack = ng; + someoneHasThisStack = true; + } + } + // Remove all candidates that aren't keeping up. + if (someoneHasThisStack) { + for (int k = 0; k < candidates.size(); k++) { + TStackList sl = up.get(((Integer)candidates.get(k)).intValue()); + if (sl.size() > stackNumber) { + if (sl.get(stackNumber).size() != maxGlyphsInThisStack) + candidates.remove(k--); + } else throw new Error("impossible!"); + } + } + ++stackNumber; + } + if (candidates.size() == 1) + return up.get(((Integer)candidates.get(0)).intValue()); + else + return null; } return null; } @@ -161,13 +210,11 @@ class TParseTree { return legalParsesWithVowelOnRoot; else { if (legalParsesWithVowelOnRoot.size() == 2) { - // DLC is this even valid? if (legalParsesWithVowelOnRoot.get(0).size() != 1 + legalParsesWithVowelOnRoot.get(1).size()) throw new Error("Something other than the G-YA vs. GYA case appeared. Sorry for your trouble! " + legalParsesWithVowelOnRoot.get(0) + " ;; " + legalParsesWithVowelOnRoot.get(1)); return new TStackListList(legalParsesWithVowelOnRoot.get(1)); } if (allLegalParses.size() == 2) { - // DLC is this even valid? if (allLegalParses.get(0).size() != 1 + allLegalParses.get(1).size()) throw new Error("Something other than the G-YA vs. GYA case appeared. Sorry for your trouble! " + allLegalParses.get(0) + " ;; " + allLegalParses.get(1)); return new TStackListList(allLegalParses.get(1)); @@ -194,7 +241,65 @@ class TParseTree { return false; } + /** Returns null if this parse tree is perfectly legal and valid. + * Returns a warning for users otherwise. If and only if + * paranoid is true, then even unambiguous ACIP like PADMA, which + * could be improved by being written as PAD+MA, will cause a + * warning. + * @param paranoid true if you do not mind a lot of warnings + * @param pl the pair list from which this parse tree originated + * @param originalACIP the original ACIP, or null if you want + * this parse tree to make a best guess. */ + public String getWarning(boolean paranoid, + TPairList pl, + String originalACIP) { + TStackListList up = getUniqueParse(); + if (null == up || up.size() != 1) { + boolean isLastStack[] = new boolean[1]; + TStackListList nip = getNonIllegalParses(); + if (nip.size() != 1) { + if (null == getBestParse()) { + return "There's not even a unique, non-illegal parse for ACIP {" + ((null != originalACIP) ? originalACIP : recoverACIP()) + "}"; + } else { + if (getBestParse().hasStackWithoutVowel(pl, isLastStack)) { + if (isLastStack[0]) { + return "Warning: The last stack does not have a vowel in the ACIP {" + ((null != originalACIP) ? originalACIP : recoverACIP()) + "}"; + } else { + return "Warning: There is a stack, before the last stack, without a vowel in the ACIP {" + ((null != originalACIP) ? originalACIP : recoverACIP()) + "}"; + } + } + if (paranoid) { + return "Though the ACIP {" + ((null != originalACIP) ? originalACIP : recoverACIP()) + "} is unambiguous, it would be more computer-friendly if + signs were used to stack things because there are two (or more) ways to interpret this ACIP if you're not careful."; + } + } + } else { + if (nip.get(0).hasStackWithoutVowel(pl, isLastStack)) { + if (isLastStack[0]) { + return "Warning: The last stack does not have a vowel in the ACIP {" + ((null != originalACIP) ? originalACIP : recoverACIP()) + "}"; + } else { + return "Warning: There is a stack, before the last stack, without a vowel in the ACIP {" + ((null != originalACIP) ? originalACIP : recoverACIP()) + "}"; + } + } + } + } + return null; + } + + /** Returns something akin to the ACIP input (okay, maybe 1-2-3-4 + * instead of 1234, and maybe AUTPA instead of AUT-PA) + * corresponding to this parse tree. */ + public String recoverACIP() { + ParseIterator pi = getParseIterator(); + if (pi.hasNext()) { + return pi.next().recoverACIP(); + } + return null; + } + /** Returns a hashCode appropriate for use with our {@link * #equals(Object)} method. */ public int hashCode() { return al.hashCode(); } + + /** Returns true if and only if this parse tree is empty. */ + public boolean isEmpty() { return al.isEmpty(); } } diff --git a/source/org/thdl/tib/text/ttt/TStackList.java b/source/org/thdl/tib/text/ttt/TStackList.java index 56d08e7..7111ba8 100644 --- a/source/org/thdl/tib/text/ttt/TStackList.java +++ b/source/org/thdl/tib/text/ttt/TStackList.java @@ -69,15 +69,25 @@ class TStackList { /** Returns true if and only if this list is empty. */ public boolean isEmpty() { return al.isEmpty(); } + /** Returns the ACIP input (okay, maybe 1-2-3-4 instead of 1234) + * corresponding to this stack list. */ + public String recoverACIP() { + return toStringHelper(false); + } + /** Returns a human-readable representation like {G}{YA} or * {GYA}. */ public String toString() { + return toStringHelper(true); + } + + private String toStringHelper(boolean brackets) { int sz = size(); StringBuffer b = new StringBuffer(); for (int i = 0; i < sz; i++) { - b.append('{'); + if (brackets) b.append('{'); b.append(get(i).recoverACIP()); - b.append('}'); + if (brackets) b.append('}'); } return b.toString(); } @@ -152,18 +162,49 @@ class TStackList { return new BoolPair(isLegal, isLegalAndHasAVowelOnRoot); } + private static final boolean ddebug = false; + /** Returns true if and only if this stack list contains a clearly * illegal construct, such as an TPair (V . something). */ boolean isClearlyIllegal() { // check for {D}{VA} sorts of things: for (int i = 0; i < size(); i++) { if (get(i).getACIPError() != null) { - System.out.println("DLC: error is " + get(i).getACIPError()); + if (ddebug) System.out.println("ddebug: error is " + get(i).getACIPError()); return true; } } return false; } + + /** Returns true if and only if this stack list contains a stack + * that does not end in a vowel or disambiguator. Note that this + * is not erroneous for legal Tibetan like {BRTAN}, where {B} has + * no vowel, but it is a warning sign for Sanskrit stacks. + * @param opl the pair list from which this stack list + * originated + * @param isLastStack if non-null, then isLastStack[0] will be + * set to true if and only if the very last stack is the only + * stack not to have a vowel or disambiguator on it */ + boolean hasStackWithoutVowel(TPairList opl, boolean[] isLastStack) { + int runningSize = 0; + for (int i = 0; i < size(); i++) { + TPairList pl = get(i); + String l; + TPair lastPair = opl.getNthNonDisambiguatorPair(runningSize + pl.size() - 1); + runningSize += pl.size(); + if (null == lastPair.getRight() + && !((l = lastPair.getLeft()) != null && l.length() == 1 + && l.charAt(0) >= '0' && l.charAt(0) <= '9')) { + if (null != isLastStack) + isLastStack[0] = (i + 1 == size()); + return true; + } + } + if (runningSize != opl.sizeMinusDisambiguators()) + throw new IllegalArgumentException("opl (" + opl + ") is bad for this stack list (" + toString() + ")"); + return false; + } } class BoolPair { diff --git a/source/org/thdl/tib/text/ttt/TStackListList.java b/source/org/thdl/tib/text/ttt/TStackListList.java index f826598..304831b 100644 --- a/source/org/thdl/tib/text/ttt/TStackListList.java +++ b/source/org/thdl/tib/text/ttt/TStackListList.java @@ -83,4 +83,11 @@ class TStackListList { * iterating and you'll have to read the code to know what will * happen. */ public ListIterator listIterator() { return al.listIterator(); } + + /** Returns the ACIP input (okay, maybe 1-2-3-4 instead of 1234) + * corresponding to this stack list list. */ + public String recoverACIP() { + if (isEmpty()) return null; + return get(0).recoverACIP(); + } }