diff --git a/source/org/thdl/tib/text/TibetanDocument.java b/source/org/thdl/tib/text/TibetanDocument.java index b29ffdc..7eee74e 100644 --- a/source/org/thdl/tib/text/TibetanDocument.java +++ b/source/org/thdl/tib/text/TibetanDocument.java @@ -232,16 +232,28 @@ public class TibetanDocument extends DefaultStyledDocument { * @param color the color in which to insert, which is used if and only * if {@link #colorsEnabled() colors are enabled} */ - public void appendDuffCodes(DuffCode[] glyphs, Color color) { + public void appendDuffCodes(DuffCode[] glyphs, Color color) { // PERFORMANCE FIXME: this isn't so speedy, but it reuses // existing code. for (int i = 0; i < glyphs.length; i++) { - insertDuff(getLength(), - new DuffData[] { new DuffData(new String(new char[] { glyphs[i].getCharacter() }), - glyphs[i].getFontNum()) }, - color); + appendDuffCode(glyphs[i], color); } - } + } + +/** +* Appends glyph to the end of this document. +* @param glyph the Tibetan glyph you want to insert +* @param color the color in which to insert, which is used if and only +* if {@link #colorsEnabled() colors are enabled} +*/ + public void appendDuffCode(DuffCode glyph, Color color) { + // PERFORMANCE FIXME: this isn't so speedy, but it reuses + // existing code. + insertDuff(getLength(), + new DuffData[] { new DuffData(new String(new char[] { glyph.getCharacter() }), + glyph.getFontNum()) }, + color); + } /** Replacing can be more efficient than inserting and then diff --git a/source/org/thdl/tib/text/ttt/ACIPConverter.java b/source/org/thdl/tib/text/ttt/ACIPConverter.java index 9262bb3..9cd602b 100644 --- a/source/org/thdl/tib/text/ttt/ACIPConverter.java +++ b/source/org/thdl/tib/text/ttt/ACIPConverter.java @@ -1,3 +1,6 @@ +// DLC NOW: 'US etc. -- do we handle them all? +// DLC NOW WARN ON NNYA and DBA +// DLC NOW: implement Robert Chilton-supplied prefix rules /* The contents of this file are subject to the THDL Open Community License Version 1.0 (the "License"); you may not use this file except in compliance @@ -348,13 +351,14 @@ public class ACIPConverter { if (null != tdoc) tdoc.appendRoman(text, Color.BLACK); } else { String unicode = null; - DuffCode[] duff = null; + Object[] duff = null; if (stype == TString.TIBETAN_NON_PUNCTUATION) { lastGuyWasNonPunct = true; - TPairList pl = TPairListFactory.breakACIPIntoChunks(s.getText()); + TPairList pls[] = TPairListFactory.breakACIPIntoChunks(s.getText()); String acipError; - if ((acipError = pl.getACIPError()) != null) { + if ((acipError = pls[0].getACIPError()) != null + && (null == pls[1] || pls[1].getACIPError() != null)) { hasErrors = true; String errorMessage = "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") " + s.getText() + " HAS THESE ERRORS: " + acipError + "]"; if (null != writer) writer.write(errorMessage); @@ -362,8 +366,10 @@ public class ACIPConverter { if (null != errors) errors.append(errorMessage + "\n"); } else { - TParseTree pt = pl.getParseTree(); - if (null == pt) { + TParseTree pt0 = pls[0].getParseTree(); + TParseTree pt1 = ((null == pls[1]) + ? null : pls[1].getParseTree()); + if (null == pt0 && null == pt1) { hasErrors = true; String errorMessage = "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") " + s.getText() + " IS ESSENTIALLY NOTHING.]"; if (null != writer) writer.write(errorMessage); @@ -371,8 +377,10 @@ public class ACIPConverter { if (null != errors) errors.append(errorMessage + "\n"); } else { - TStackList sl = pt.getBestParse(); - if (null == sl) { + TStackList sl0 = pt0.getBestParse(); + TStackList sl1 = ((null == pt1) + ? null : pt1.getBestParse()); + if (null == sl0 && null == sl1) { hasErrors = true; String errorMessage = "[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") " + s.getText() + " HAS NO LEGAL PARSES.]"; if (null != writer) writer.write(errorMessage); @@ -380,6 +388,25 @@ public class ACIPConverter { if (null != errors) errors.append(errorMessage + "\n"); } else { + TStackList sl = sl0; + TPairList pl = pls[0]; + TParseTree pt = pt0; + // set sl equal to the best choice of sl0 and sl1. + if (null != sl1) { + BoolTriple sl0bt = sl0.isLegalTshegBar(false); + BoolTriple sl1bt = sl1.isLegalTshegBar(false); + int ct; + if ((ct = sl0bt.compareTo(sl1bt)) < 0) { + sl = sl1; + pl = pls[1]; + pt = pt1; + } else if (0 == ct) { + // sl remains sl0 -- '* is + // a vowel unless it's + // clearly part of an + // appendage like 'AM. + } + } lastGuy = sl; String warning = null; if ("None" != warningLevel) { @@ -428,10 +455,10 @@ public class ACIPConverter { color = Color.BLACK; if (stype == TString.START_SLASH) { if (null != writer) unicode = "\u0F3C"; - if (null != tdoc) duff = new DuffCode[] { TibetanMachineWeb.getGlyph("(") }; + if (null != tdoc) duff = new Object[] { TibetanMachineWeb.getGlyph("(") }; } else if (stype == TString.END_SLASH) { if (null != writer) unicode = "\u0F3D"; - if (null != tdoc) duff = new DuffCode[] { TibetanMachineWeb.getGlyph(")") }; + if (null != tdoc) duff = new Object[] { TibetanMachineWeb.getGlyph(")") }; } else if (stype == TString.TIBETAN_PUNCTUATION) { // For ACIP, tshegs are used as both // tshegs and whitespace. We treat a @@ -499,7 +526,7 @@ public class ACIPConverter { } else { String wy = ACIPRules.getWylieForACIPOther(s.getText()); if (null == wy) throw new Error("No wylie for ACIP " + s.getText()); - duff = new DuffCode[] { TibetanMachineWeb.getGlyph(wy) }; + duff = new Object[] { TibetanMachineWeb.getGlyph(wy) }; } } } @@ -526,7 +553,18 @@ public class ACIPConverter { if (null != writer && null != unicode) writer.write(unicode); if (null != tdoc) { if (null != duff && 0 != duff.length) { - tdoc.appendDuffCodes(duff, color); + for (int j = 0; j < duff.length; j++) { + if (duff[j] instanceof DuffCode) + tdoc.appendDuffCode((DuffCode)duff[j], + color); + else { + hasErrors = true; + if (null != errors) + errors.append((String)duff[j] + "\n"); + tdoc.appendRoman((String)duff[j], + Color.RED); + } + } } else { // this happens when you have an // [#ERROR]-producing tsheg bar. diff --git a/source/org/thdl/tib/text/ttt/ACIPRules.java b/source/org/thdl/tib/text/ttt/ACIPRules.java index 5508262..ad535f7 100644 --- a/source/org/thdl/tib/text/ttt/ACIPRules.java +++ b/source/org/thdl/tib/text/ttt/ACIPRules.java @@ -179,6 +179,11 @@ public class ACIPRules { wylieToACIP.put(EWTS, ACIP); } + /** Returns true if and only if s is an ACIP consonant. */ + static final boolean isACIPConsonant(String s) { + return (null != ACIPRules.getWylieForACIPConsonant(s)); + } + private static HashMap acipConsonant2wylie = null; /** Returns the EWTS corresponding to the given ACIP consonant * (without the "A" vowel). Returns null if there is no such diff --git a/source/org/thdl/tib/text/ttt/PackageTest.java b/source/org/thdl/tib/text/ttt/PackageTest.java index ba71f11..c99c164 100644 --- a/source/org/thdl/tib/text/ttt/PackageTest.java +++ b/source/org/thdl/tib/text/ttt/PackageTest.java @@ -52,28 +52,31 @@ public class PackageTest extends TestCase { public PackageTest() { } private static void tstHelper(String acip) { - tstHelper2(acip, null, false, null, null, null); + tstHelper2(acip, null, false, null, null, null, 0); } private static void tstHelper(String acip, String expectedPairs) { - tstHelper2(acip, expectedPairs, false, null, null, null); + tstHelper2(acip, expectedPairs, false, null, null, null, 0); } private static void tstHelper(String acip, String[] expectedParses) { - tstHelper2(acip, null, false, expectedParses, null, null); + tstHelper2(acip, null, false, expectedParses, null, null, 0); } private static void tstHelper(String acip, String expectedPairs, String[] expectedParses) { - tstHelper2(acip, expectedPairs, false, expectedParses, null, null); + tstHelper2(acip, expectedPairs, false, expectedParses, null, null, 0); } private static void tstHelper(String acip, String expectedPairs, String[] expectedParses, String[] legalParses) { - tstHelper2(acip, expectedPairs, false, expectedParses, legalParses, null); + tstHelper2(acip, expectedPairs, false, expectedParses, legalParses, null, 0); } private static void tstHelper(String acip, String expectedPairs, String[] expectedParses, String[] legalParses, String expectedBestParse) { - tstHelper2(acip, expectedPairs, false, expectedParses, legalParses, expectedBestParse); + tstHelper2(acip, expectedPairs, false, expectedParses, legalParses, expectedBestParse, 0); + } + private static void tstHelper(String acip, String expectedPairs, String[] expectedParses, String[] legalParses, String expectedBestParse, int which) { + tstHelper2(acip, expectedPairs, false, expectedParses, legalParses, expectedBestParse, which); } private static void tstHelper2(String acip) { tstHelper2(acip, null); } private static void tstHelper2(String acip, String expectedPairs) { - tstHelper2(acip, expectedPairs, true, null, null, null); + tstHelper2(acip, expectedPairs, true, null, null, null, 0); } private static final boolean sdebug = false; @@ -82,8 +85,10 @@ public class PackageTest extends TestCase { boolean debug, String[] expectedParses, String[] expectedLegalParses, - String expectedBestParse) { - TPairList l = TPairListFactory.breakACIPIntoChunks(acip); + String expectedBestParse, + int pairListToUse) { + TPairList[] la = TPairListFactory.breakACIPIntoChunks(acip); + TPairList l = la[pairListToUse]; if (sdebug || debug) System.out.println("ACIP=" + acip + " and l'=" + l); if (expectedPairs != null) { @@ -93,6 +98,10 @@ public class PackageTest extends TestCase { } } + if (null == l) { + assertTrue("!null!".equals(expectedBestParse)); + return; + } TParseTree pt = l.getParseTree(); if (pt == null) { if (sdebug || debug) @@ -110,7 +119,7 @@ public class PackageTest extends TestCase { int np = pt.numberOfParses(); boolean goodness = expectedParses == null || expectedParses.length == np; if (sdebug || debug || !goodness) - System.out.println("ACIP=" + acip + " and parse tree=" + pt + " /size " + pt.size() + "; /pairs " + pt.numberOfPairs() + "; /numParses " + np); + System.out.println("ACIP=" + acip + " and expectedParses is " + expectedParses + " with length " + ((null == expectedParses)?0:expectedParses.length) + " and parse tree=" + pt + " /size " + pt.size() + "; /pairs " + pt.numberOfPairs() + "; /numParses " + np); assertTrue(goodness); { @@ -204,7 +213,9 @@ public class PackageTest extends TestCase { && (acip.indexOf('6') < 0) && (acip.indexOf('7') < 0) && (acip.indexOf('8') < 0) - && (acip.indexOf('9') < 0)) { + && (acip.indexOf('9') < 0) + && pairListToUse == 1 + && (acip.indexOf('\'') < 0)) { System.out.println("acip=" + acip + "; recovery is " + l.recoverACIP()); assertTrue(false); @@ -214,18 +225,25 @@ public class PackageTest extends TestCase { // DLC FIXME: warn if we have to use the "what stacks take a GA prefix?" rules to get a unique legal parse. public void testCutoff() { - // this would be exponential running time, so we cut it off: + // this would once be exponential running time, so we'd cut it off: tstHelper("BRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTNBRTN"); } public void testSlowestTshegBar() { - // this would be exponential running time, so we cut it off: + // this would once be exponential running time, so we'd cut it off: tstHelper("BRTNBRTNBRTNB"); } public void testPerformance() { tstHelper("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX"); - tstHelper("9012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678"); + boolean x = false; + try { + tstHelper("901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678"); + } catch (IllegalArgumentException e) { + x = true; + } + assertTrue(x); + tstHelper("9012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678"); } /** Tests {@link TPairListFactory#breakACIPIntoChunks(String)}, @@ -332,20 +350,71 @@ tstHelper("KA'", "[(K . A), (' . )]", // If you're not careful, you'll think GGYES is a legal // Tibetan tsheg bar and parse it as {G}{G+YE}{S}. But it's - // Sanskrit, really, because GA doesn't take a GA prefix. + // non-native, really, because GA doesn't take a GA prefix. // This doesn't occur in ACIP input files that I've seen, but // GGYI (S1000I.INC) and GGYUR (S5275MC4.ACT) do occur. tstHelper("GGYES", "{G}{G}{YE}{S}", - new String[] { "{G}{G}{YE}{S}", "{G}{G+YE}{S}", "{G+G}{YE}{S}" }, + new String[] { "{G+G+YE}{S}", "{G}{G+YE}{S}" }, new String[] { }, - "{G+G}{YE}{S}"); + "{G+G+YE}{S}"); // DLC FIXME: warn about BDE vs. B+DE. color such differently. Maybe an inputter saw B+DE and typed in BDE, not thinking. tstHelper("BDE", "{B}{DE}", - new String[] { "{B}{DE}", "{B+DE}" }, + new String[] { "{B+DE}", "{B}{DE}" }, new String[] { "{B}{DE}" }, "{B}{DE}"); + tstHelper("GDAMS'O", "{G}{DA}{M}{S'O}", + new String[] { + "{G+DA}{M+S'O}", + "{G}{DA}{M+S'O}", + }, + new String[] { }, + "{G+DA}{M+S'O}", 0); + + tstHelper("GDAMS'O", "{G}{DA}{M}{S-}{'O}", + new String[] { + "{G+DA}{M+S}{'O}", + "{G+DA}{M}{S}{'O}", + "{G}{DA}{M+S}{'O}", + "{G}{DA}{M}{S}{'O}", + }, + new String[] { "{G}{DA}{M}{S}{'O}" }, + "{G}{DA}{M}{S}{'O}", 1); + + tstHelper("SNYAMS'AM'ANG", "{S}{NYA}{M}{S-}{'A}{M-}{'A}{NG}", null, null, "{S+NYA}{M}{S}{'A}{M}{'A}{NG}", 1); + tstHelper("SNYAMS'AM'ANG", "{S}{NYA}{M}{S'A}{M'A}{NG}", null, null, "{S+NYA}{M+S'A}{M'A}{NG}", 0); + tstHelper("SNYAM'AM", null, null, null, "{S+NYA}{M}{'A}{M}", 1); + tstHelper("SNYAMS'AM", null, null, null, "{S+NYA}{M}{S}{'A}{M}", 1); + tstHelper("SNYAM-'A-M", null, null, null, "!null!", 1); + tstHelper("SNYAM-'A-M", null, null, null, "{S+NYA}{M}{'A}{M}", 0); + tstHelper("SNY-M-'-M", null, null, null, "{S+NY}{M}{'}{M}", 0); + tstHelper("SNY-M-'-M", null, null, null, "!null!", 1); + tstHelper("SNYAMS'AM'ANG'U'I'O", null, null, null, "{S+NYA}{M}{S}{'A}{M}{'A}{NG}{'U}{'I}{'O}", 1); + tstHelper("SNYAMS'I'AM'ANG'U'I'O", null, null, null, "{S+NYA}{M}{S}{'I}{'A}{M}{'A}{NG}{'U}{'I}{'O}", 1); + tstHelper("SNYAM+S+'O", null, null, null, "{S+NYA}{M+S+'O}", 0); + tstHelper("SNYAMS+'O", null, null, null, "{S+NYA}{M+S+'O}", 0); + tstHelper("SNYAMS+'O", null, null, null, "{S+NYA}{M+S+'O}", 0); + tstHelper("SAM'UR'US", null, null, null, "{SA}{M}{'U}{R}{'U}{S}", 1); + tstHelper("SAM'US", null, null, null, "{SA}{M}{'U}{S}", 1); + tstHelper("SAM'AM", null, null, null, "{SA}{M}{'A}{M}", 1); + tstHelper("SAMS'ANG", null, null, null, "{SA}{M}{S}{'A}{NG}", 1); + tstHelper("SNYANGD'O", null, null, null, "{S+NYA}{NG}{D}{'O}", 1); + tstHelper("T-SNYANGD'O", null, null, null, "{T}{S+NYA}{NG+D}{'O}", 1); // T is no prefix, so NG+D, not NG-D + tstHelper("T-SNYANGD'O", null, null, null, "{T}{S+NYA}{NG+D'O}", 0); + + tstHelper("SNYAM+S+'O", null, null, null, "{S+NYA}{M+S+'O}", 0); + tstHelper("SNYAMS+'O", null, null, null, "{S+NYA}{M+S+'O}", 0); + + tstHelper("GDAMS", null, null, null, "{G}{DA}{M}{S}", 0); + tstHelper("GDAM-S'O", null, null, null, "{G}{DA}{M}{S}{'O}", 1); + tstHelper("GDAM-C'O", null, null, null, "{G+DA}{M}{C'O}", 0); + tstHelper("GDAM-C'O", null, null, null, "{G+DA}{M}{C}{'O}", 1); + tstHelper("GDAMS", null, null, null, "{G}{DA}{M}{S}", 0); + // DLC NOW: FIXME: tstHelper("DKHY", null, null, null, "{D}{KH+YA}", 0); + // DLC DKHY'O should give parse tree {{D-KH+Y'O}, {D+KH+Y'O}} + // DLC DKHYA'O should give parse tree {{D-KH+YA'O}, {D+KH+YA'O}} + tstHelper("SHR'I", "{SH}{R'I}", null, null, @@ -357,7 +426,7 @@ tstHelper("KA'", "[(K . A), (' . )]", // DLC FIXME: do TMW->ACIP->TMW->ACIP round-trip. tstHelper("DRUG", "{D}{RU}{G}", - new String[] { "{D}{RU}{G}", "{D+RU}{G}" }, + new String[] { "{D+RU}{G}", "{D}{RU}{G}" }, new String[] { "{D+RU}{G}" }, "{D+RU}{G}"); @@ -369,22 +438,22 @@ tstHelper("KA'", "[(K . A), (' . )]", tstHelper("Gd+H+d+HA"); tstHelper("AUTPA", "{AU}{T}{PA}", - new String[] { "{AU}{T}{PA}", "{AU}{T+PA}" }, + new String[] { "{AU}{T+PA}" }, new String[] { }, "{AU}{T+PA}"); tstHelper("PADMA", "{PA}{D}{MA}", null, null); tstHelper("PADMA", "{PA}{D}{MA}", - new String[] { "{PA}{D}{MA}", "{PA}{D+MA}" }, + new String[] { "{PA}{D+MA}" }, new String[] { }, "{PA}{D+MA}"); tstHelper("PADMDM", "{PA}{D}{M}{D}{M}", null, new String[] { }, - "{PA}{D+M}{D+M}"); + "{PA}{D+M+D+M}"); tstHelper("GRVA'I", "{G}{R}{VA}{'I}", - new String[] { "{G}{R+VA}{'I}", "{G+R+VA}{'I}" }, + new String[] { "{G+R+VA}{'I}", "{G}{R+VA}{'I}" }, new String[] { "{G+R+VA}{'I}" }); tstHelper("G-RVA'I", "{G-}{R}{VA}{'I}", new String[] { "{G}{R+VA}{'I}" }, @@ -393,22 +462,24 @@ tstHelper("KA'", "[(K . A), (' . )]", tstHelper("RVA", "{R}{VA}", new String[] { "{R+VA}" }, new String[] { "{R+VA}" }); + tstHelper("VA", "{VA}", + new String[] { "{VA}" }, + new String[] { }, + ""); + tstHelper("K+O", "{K+}{O}", new String[] { }, new String[] { }); tstHelper("K+0", "{K+}{0}", new String[] { }, new String[] { }); tstHelper("0+K", "{0-}{+-}{K}", new String[] { }, new String[] { }); tstHelper("0+0", "{0-}{+-}{0}", new String[] { }, new String[] { }); - // DLC add tests for BRTAN, BLTA, BLAG, BRAG, B-LAG, B-RAG - - // MARK for searching tstHelper("0", "{0}", new String[] { "{0}" }, new String[] { "{0}" }); tstHelper("0123", "{0-}{1-}{2-}{3}", new String[] { "{0}{1}{2}{3}" }, new String[] { "{0}{1}{2}{3}" }); tstHelper("0-123", "{0-}{-}{1-}{2-}{3}", new String[] { "{0}{1}{2}{3}" }, new String[] { "{0}{1}{2}{3}" }); - tstHelper("0123KA", "{0-}{1-}{2-}{3-}{KA}", new String[] { "{0}{1}{2}{3}{KA}" }, + tstHelper("0123KA", "{0-}{1-}{2-}{3-}{KA}", new String[] { }, new String[] { }); - tstHelper("G0123KA", "{G}{0-}{1-}{2-}{3-}{KA}", new String[] { "{G}{0}{1}{2}{3}{KA}" }, + tstHelper("G0123KA", "{G}{0-}{1-}{2-}{3-}{KA}", new String[] { }, new String[] { }); tstHelper("BHA"); tstHelper("BHE"); @@ -418,10 +489,10 @@ tstHelper("KA'", "[(K . A), (' . )]", tstHelper("D-VA"); tstHelper("DVA"); tstHelper("SRAS", "{S}{RA}{S}", - new String[] { "{S}{RA}{S}", "{S+RA}{S}" }, + new String[] { "{S+RA}{S}" }, new String[] { "{S+RA}{S}" }); tstHelper("SARS", "{SA}{R}{S}", - new String[] { "{SA}{R}{S}", "{SA}{R+S}" }, + new String[] { "{SA}{R+S}", "{SA}{R}{S}" }, new String[] { "{SA}{R}{S}" }); tstHelper("SARAS", "{SA}{RA}{S}", new String[] { "{SA}{RA}{S}" }, @@ -429,9 +500,9 @@ tstHelper("KA'", "[(K . A), (' . )]", tstHelper("SHLO", "{SH}{LO}", - new String[] { "{SH}{LO}", "{SH+LO}" }, + new String[] { "{SH+LO}" }, new String[] { "{SH+LO}" }); - tstHelper("ZLUM", "{Z}{LU}{M}", new String[] { "{Z}{LU}{M}", "{Z+LU}{M}" }, new String[] { "{Z+LU}{M}" }); + tstHelper("ZLUM", "{Z}{LU}{M}", new String[] { "{Z+LU}{M}" }, new String[] { "{Z+LU}{M}" }); tstHelper("K'EE", "{K'EE}"); tstHelper("K'O", "{K'O}"); tstHelper("K'OO", "{K'OO}"); @@ -439,8 +510,7 @@ tstHelper("KA'", "[(K . A), (' . )]", tstHelper("K'i", "{K'i}"); tstHelper("K'A", "{K'A}"); tstHelper("B+DDZ", "{B+}{D}{DZ}", - new String[] { "{B+D}{DZ}", - "{B+D+DZ}" }); // we're conservative. + new String[] { "{B+D+DZ}" }); // we're conservative. // A heuristic is to // say that B+DDZ must // be {B+D}{DZ} @@ -452,62 +522,44 @@ tstHelper("KA'", "[(K . A), (' . )]", // that we know the // keyboardist was // aware of the plus - // operator. + // operator. DLC FIXME: warn in this case! tstHelper("BRTN--GA", "{B}{R}{T}{N-}{-}{GA}", new String[] { - "{B}{R}{T}{N}{GA}", - "{B}{R}{T+N}{GA}", - "{B}{R+T}{N}{GA}", - "{B+R}{T}{N}{GA}", - "{B+R}{T+N}{GA}", - }); - tstHelper("BR-TN"); // DLC: no legal parses, and 2 decent ones, eh? + "{B+R+T+N}{GA}", + "{B}{R+T+N}{GA}" + }, + new String[] {}, + "{B+R+T+N}{GA}"); + tstHelper("BR-TN"); tstHelper("BRTN", "{B}{R}{T}{N}", new String[] { - "{B}{R}{T}{N}", - "{B}{R}{T+N}", - "{B}{R+T}{N}", - "{B+R}{T}{N}", - "{B+R}{T+N}", + "{B+R+T+N}", + "{B}{R+T+N}" }, - new String[] { - "{B}{R+T}{N}" // prefix-root-suffix - }); + new String[] { }, + "{B+R+T+N}"); + tstHelper("BRT-N", + "{B}{R}{T-}{N}", + null, + null, + "{B}{R+T}{N}"); + tstHelper("BRTAN", + "{B}{R}{TA}{N}", + null, + null, + "{B}{R+TA}{N}"); tstHelper("BRTN-BRTN", "{B}{R}{T}{N-}{B}{R}{T}{N}", new String[] { - "{B}{R}{T}{N}{B}{R}{T}{N}", - "{B}{R}{T}{N}{B}{R}{T+N}", - "{B}{R}{T}{N}{B}{R+T}{N}", - "{B}{R}{T}{N}{B+R}{T}{N}", - "{B}{R}{T}{N}{B+R}{T+N}", - "{B}{R}{T+N}{B}{R}{T}{N}", - "{B}{R}{T+N}{B}{R}{T+N}", - "{B}{R}{T+N}{B}{R+T}{N}", - "{B}{R}{T+N}{B+R}{T}{N}", - "{B}{R}{T+N}{B+R}{T+N}", - "{B}{R+T}{N}{B}{R}{T}{N}", - "{B}{R+T}{N}{B}{R}{T+N}", - "{B}{R+T}{N}{B}{R+T}{N}", - "{B}{R+T}{N}{B+R}{T}{N}", - "{B}{R+T}{N}{B+R}{T+N}", - "{B+R}{T}{N}{B}{R}{T}{N}", - "{B+R}{T}{N}{B}{R}{T+N}", - "{B+R}{T}{N}{B}{R+T}{N}", - "{B+R}{T}{N}{B+R}{T}{N}", - "{B+R}{T}{N}{B+R}{T+N}", - "{B+R}{T+N}{B}{R}{T}{N}", - "{B+R}{T+N}{B}{R}{T+N}", - "{B+R}{T+N}{B}{R+T}{N}", - "{B+R}{T+N}{B+R}{T}{N}", - "{B+R}{T+N}{B+R}{T+N}", + "{B+R+T+N}{B+R+T+N}", + "{B}{R+T+N}{B+R+T+N}" }); // has 25 parses tstHelper("B+R-T-N-B-R-T+N", new String[] { "{B+R}{T}{N}{B}{R}{T+N}" }); // has 1 parse - tstHelper("B+G+K", "{B+}{G+}{K}", new String[] { }); // no parses. + tstHelper("B+G+K", "{B+}{G+}{K}", new String[] { "{B+G+K}" }, null, "{B+G+K}"); // no parses. tstHelper("G-YA", new String[] { "{G}{YA}" }); // has 1 parse tstHelper("G+YA", @@ -516,51 +568,31 @@ tstHelper("KA'", "[(K . A), (' . )]", new String[] { "{G+YAm:}" }); // has 1 parse tstHelper("BRTN-BLTA", "{B}{R}{T}{N-}{B}{L}{TA}", new String[] { - "{B}{R}{T}{N}{B}{L}{TA}", - "{B}{R}{T}{N}{B}{L+TA}", - "{B}{R}{T}{N}{B+L}{TA}", - "{B}{R}{T+N}{B}{L}{TA}", - "{B}{R}{T+N}{B}{L+TA}", - "{B}{R}{T+N}{B+L}{TA}", - "{B}{R+T}{N}{B}{L}{TA}", - "{B}{R+T}{N}{B}{L+TA}", - "{B}{R+T}{N}{B+L}{TA}", - "{B+R}{T}{N}{B}{L}{TA}", - "{B+R}{T}{N}{B}{L+TA}", - "{B+R}{T}{N}{B+L}{TA}", - "{B+R}{T+N}{B}{L}{TA}", - "{B+R}{T+N}{B}{L+TA}", - "{B+R}{T+N}{B+L}{TA}", + "{B+R+T+N}{B+L+TA}", + "{B}{R+T+N}{B+L+TA}" }); // has 15 parses - tstHelper("BSABS", new String[] { "{B}{SA}{B}{S}" }); + tstHelper("BSABS", + new String[] { "{B+SA}{B+S}", "{B+SA}{B}{S}", "{B}{SA}{B+S}", "{B}{SA}{B}{S}" }); tstHelper("ZUNGS"); tstHelper("BRTIB", "{B}{R}{TI}{B}", new String[] { - "{B}{R}{TI}{B}", - "{B}{R+TI}{B}", - "{B+R}{TI}{B}", + "{B+R+TI}{B}", + "{B}{R+TI}{B}" }); tstHelper("PRiTZTSVA", "{P}{Ri}{TZ}{TS}{VA}", new String[] { - "{P}{Ri}{TZ}{TS+VA}", - "{P}{Ri}{TZ+TS+VA}", - "{P+Ri}{TZ}{TS+VA}", "{P+Ri}{TZ+TS+VA}" }); tstHelper("SPYOMS", "{S}{P}{YO}{M}{S}", new String[] { - "{S}{P}{YO}{M}{S}", - "{S}{P}{YO}{M+S}", - "{S}{P+YO}{M}{S}", - "{S}{P+YO}{M+S}", - "{S+P}{YO}{M}{S}", - "{S+P}{YO}{M+S}", - "{S+P+YO}{M}{S}", "{S+P+YO}{M+S}", + "{S+P+YO}{M}{S}", }); tstHelper(":'AO", "[(: . -), (' . ), (A . O)]"); + tstHelper("m'AO", "[(m . -), (' . ), (A . O)]"); + tstHelper("m:'AO", "[(m . -), (: . -), (' . ), (A . O)]"); tstHelper("AA:", "[(A . A:)]", new String[] { "{AA:}" }); tstHelper("KE:", "[(K . E:)]"); tstHelper("K:", "[(K . ), (: . )]", @@ -2174,6 +2206,7 @@ tstHelper("DBANG"); tstHelper("DBAR"); tstHelper("DBAS"); tstHelper("DBE"); +// DLC NOW: TMW->ACIP doesn't do {KHA (KA)}. tstHelper("DBEN"); tstHelper("DBER"); tstHelper("DBES"); @@ -7135,7 +7168,7 @@ tstHelper("ZUR"); + "Offset 13 or maybe 13: Found an open bracket within a [#COMMENT]-style comment. Brackets may not appear in comments.\n"); shelp("[ILLEGAL COMMENT]", "Offset 0 or maybe 0: Found an illegal open bracket (in context, this is [ILLEGAL C...). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset 16 or maybe 16: Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n"); - shelp("(BSKYABS GRO)", ""); // DLC WHAT ARE THESE FOR? + shelp("(BSKYABS GRO)", ""); shelp("BSKYABS GRO)", "Offset 11 or maybe 11: Unexpected closing parenthesis, ), found.\n"); shelp("BSKYABS GRO(", "Offset END: Unmatched open parenthesis, (, found.\n"); shelp("((NESTAGE))", "Offset 1 or maybe 1: Found an illegal open parenthesis, (. Nesting of parentheses is not allowed.\nOffset 10 or maybe 10: Unexpected closing parenthesis, ), found.\n"); @@ -7171,7 +7204,7 @@ tstHelper("ZUR"); shelp("[* Correction with []]", "Offset 5 or maybe 5: Found an illegal character, r, with ordinal 114.\nOffset 6 or maybe 6: Found an illegal character, r, with ordinal 114.\nOffset 7 or maybe 7: Found an illegal character, e, with ordinal 101.\nOffset 8 or maybe 8: Found an illegal character, c, with ordinal 99.\nOffset 14 or maybe 14: Found an illegal character, w, with ordinal 119.\nOffset 19 or maybe 19: Found an illegal open bracket (in context, this is []]). Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset 21 or maybe 21: Found a closing bracket without a matching open bracket. Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n"); - // DLC FIXME: the line SDIG PA'I GROGS PO'I LAG TU SON PAR 'GYUR PA is followed by a blank line. Note that it's "PA", not "PA ", ending it. Autocorrect to the latter. + // DLC DOC: the line SDIG PA'I GROGS PO'I LAG TU SON PAR 'GYUR PA is followed by a blank line. Note that it's "PA", not "PA ", ending it. We autocorrect to the latter. // DLC FIXME: @0B1 isn't handled correctly! @@ -7268,7 +7301,7 @@ tstHelper("ZUR"); System.out.println("No unicode exists for " + acip + " but you expected " + org.thdl.tib.text.tshegbar.UnicodeUtils.unicodeStringToPrettyString(expectedUnicode)); assertTrue(false); } - System.out.println("DLC: Unicode for " + acip + " can't be had; errors are " + errors); + System.out.println("Unicode for " + acip + " can't be had; errors are " + errors); } else { if (null != expectedUnicode && !expectedUnicode.equals(unicode)) { System.out.println("The unicode for " + acip + " is " + org.thdl.tib.text.tshegbar.UnicodeUtils.unicodeStringToPrettyString(unicode) + ", but you expected " + org.thdl.tib.text.tshegbar.UnicodeUtils.unicodeStringToPrettyString(expectedUnicode)); @@ -7278,6 +7311,18 @@ tstHelper("ZUR"); } public void testACIPConversion() { + uhelp("DZHDZHA", "\u0f5c\u0fac"); // tricky because DZHDZA is not in TMW but DZHDZHA is + uhelp("DZHDZA", "\u0f5c\u0fab"); + uhelp("P+S+N+YA", "\u0f54\u0fb6\u0fa3\u0fb1"); + uhelp("PSNYA", "\u0f54\u0fb6\u0f99"); // Is this P+S+N+YA? No, it's P+S+NYA. But, DLC, warn! + uhelp("NNYA", "\u0f53\u0f99"); // DLC warn + uhelp("GHNYA", "\u0f43\u0f99"); + + // TS+NYA and T+S+N+YA are both legal, so what is TSNYA? + // Private correspondence with Robert Chilton says that it is + // TS+NYA, but he warns that such are suspect. + + uhelp("THAG PA", "\u0f50\u0f42\u0f0b\u0f54"); uhelp("KA \nKHA\n\nGA", "\u0f40\u0f0b\u0f41\u0f0b\n\n\u0f42"); uhelp("KA%\nKHA", "\u0f40\u0f35\u0f0b\u0f41"); uhelp("KA%", "\u0f40\u0f35"); @@ -7317,7 +7362,7 @@ tstHelper("ZUR"); uhelp("*#HUm: G+DHOO GRO`;.,", "\u0f04\u0f05\u0f04\u0f05\u0f05\u0f67\u0f74\u0f7e\u0f7f\u0f0b\u0f42\u0fa2\u0f7d\u0f0b\u0f42\u0fb2\u0f7c\u0f08\u0f11\u0f0c\u0f0d"); uhelp("*#HUm: K+DHA GRO`;.,", - "\u0f04\u0f05\u0f04\u0f05\u0f05\u0f67\u0f74\u0f7e\u0f7f\u0f0b[#ERROR CONVERTING ACIP DOCUMENT: THE TSHEG BAR (\"SYLLABLE\") K+DHA IS ESSENTIALLY NOTHING.]\u0f0b\u0f42\u0fb2\u0f7c\u0f08\u0f11\u0f0c\u0f0d"); + "\u0f04\u0f05\u0f04\u0f05\u0f05\u0f67\u0f74\u0f7e\u0f7f\u0f0b\u0f40\u0fa2\u0f0b\u0f42\u0fb2\u0f7c\u0f08\u0f11\u0f0c\u0f0d"); // DLC FIXME: the file ACIP_SHRI should be made into an ACIP->TMW automated test case } @@ -7507,7 +7552,6 @@ tstHelper("ZUR"); tstHelper("AUT"); tstHelper("AUTPALA'I"); tstHelper("B'I"); - tstHelper("B. (DLC!)"); tstHelper("BA"); tstHelper("BA'"); tstHelper("BA'A"); @@ -7919,7 +7963,6 @@ tstHelper("ZUR"); tstHelper("GDONGS"); tstHelper("GDUD"); tstHelper("GDUG"); - tstHelper("GDUG.PA (DLC!)"); tstHelper("GDUGS"); tstHelper("GDUN"); tstHelper("GE"); @@ -8474,7 +8517,6 @@ tstHelper("ZUR"); tstHelper("RNGUS"); tstHelper("RNOGS"); tstHelper("RO"); - tstHelper("RO.STOD (DLC!)"); tstHelper("ROL"); tstHelper("RSBOD"); tstHelper("RTAG"); diff --git a/source/org/thdl/tib/text/ttt/TPair.java b/source/org/thdl/tib/text/ttt/TPair.java index f442007..a3f8e7f 100644 --- a/source/org/thdl/tib/text/ttt/TPair.java +++ b/source/org/thdl/tib/text/ttt/TPair.java @@ -118,7 +118,7 @@ class TPair { return (null != l && ((null == r || "".equals(r)) || "-".equals(r) - || "A".equals(r)) // DLC though check for BASKYABS and warn because BSKYABS is more common + || "A".equals(r)) // DLC FIXME: though check for BASKYABS and warn because BSKYABS is more common && ("'".equals(l) || "M".equals(l) || "B".equals(l) @@ -126,12 +126,52 @@ class TPair { || "G".equals(l))); } + /** Returns true if and only if this pair could be a Tibetan + * secondary sufffix. */ + boolean isPostSuffix() { + return (null != l + && ((null == r || "".equals(r)) + || "-".equals(r) + || "A".equals(r)) // DLC FIXME: though warn about GAMASA vs. GAMS + && ("S".equals(l) + || "D".equals(l))); + } + + /** Returns true if and only if this pair could be a Tibetan + * sufffix. DLC FIXME: ACIP specific, just like isPostSuffix() and isPrefix() */ + boolean isSuffix() { + return (null != l + && ((null == r || "".equals(r)) + || "-".equals(r) + || "A".equals(r)) + && ("S".equals(l) + || "G".equals(l) + || "D".equals(l) + || "M".equals(l) + || "'".equals(l) + || "B".equals(l) + || "NG".equals(l) + || "N".equals(l) + || "L".equals(l) + || "R".equals(l))); + } + /** Returns true if and only if this pair is merely a * disambiguator. */ boolean isDisambiguator() { return ("-".equals(r) && getLeft() == null); } + /** Yep, this works for TPairs. */ + public boolean equals(Object x) { + if (x instanceof TPair) { + TPair p = (TPair)x; + return ((getLeft() == p.getLeft() || (getLeft() != null && getLeft().equals(p.getLeft()))) + || (getRight() == p.getRight() || (getRight() != null && getRight().equals(p.getRight())))); + } + return false; + } + /** Returns an TPair that is like this pair except that it has * a "+" on the right if this pair is empty on the right and is * empty on the right if this pair has a disambiguator (i.e., a @@ -195,4 +235,11 @@ class TPair { if (null != x) sb.append(x); } } + + /** Returns true if this pair is surely the last pair in an ACIP + * stack. Stacking continues through (* . ) and (* . +), but + * stops anywhere else. */ + boolean endsACIPStack() { + return (getRight() != null && !"+".equals(getRight())); + } } diff --git a/source/org/thdl/tib/text/ttt/TPairList.java b/source/org/thdl/tib/text/ttt/TPairList.java index 6858734..6df7031 100644 --- a/source/org/thdl/tib/text/ttt/TPairList.java +++ b/source/org/thdl/tib/text/ttt/TPairList.java @@ -284,216 +284,216 @@ class TPairList { * syntax) to do so. If this list of pairs has something clearly * illegal in it, or is empty, or is merely a list of * disambiguators etc., then this returns null. Never returns an - * empty parse tree. */ + * empty parse tree. + */ public TParseTree getParseTree() { - TParseTree pt = new TParseTree(); + // We treat [(B . ), (G . +), (K . ), (T . A)] as if it could + // be {B+G+K+T} or {B}{G+K+T}; we handle prefixes specially + // this way. [(T . ), (G . +), (K . ), (T . A)] is clearly + // {T+G+K+TA} (and, DLC FIXME, we should warn that there are + // some pluses but not all) + // + // We don't care if T+G+K+T is in TMW or not -- there is no + // master list of stacks. + int sz = size(); - int firstPair = 0; for (int i = 0; i < sz; i++) { - - // We treat [(B . ), (G . +), (K . ), (T . A)] as if it - // could be {B+G+K+T} or {B}{G+K}{T} or {B+G+K}{T} or - // {B}{G+K+T} (modulo stack legality); we're conservative. - // (Though some stacks won't be legal.) + TPair p = get(i); + if (p.getLeft() == null && !"-".equals(p.getRight())) + return null; // clearly illegal. + if ("+".equals(p.getLeft())) + return null; // clearly illegal. + if (":".equals(p.getLeft())) + return null; // clearly illegal. + if ("m".equals(p.getLeft())) + return null; // clearly illegal. + if ("m:".equals(p.getLeft())) + return null; // clearly illegal. + } + TParseTree pt = new TParseTree(); + if (sz < 1) return null; + + // When we see a stretch of ACIP without a disambiguator or a + // vowel, that stretch is taken to be one stack unless it may + // be prefix-root or suffix-postsuffix or suffix/postsuffix-' + // -- the latter necessary because GAMS'I is GAM-S-'I, not + // GAM-S+'I. 'UR, 'US, 'ANG, 'AM, 'I, 'O, 'U -- all begin + // with '. So we can have zero, one, two, or three special + // break locations. (The kind that aren't special are the + // break after G in G-DAMS, or the break after G in GADAMS or + // GEDAMS.) + // + // If a nonnegative number appears in breakLocations[i], it + // means that pair i may or may not be stacked with pair i+1. + int nextBreakLoc = 0; + int breakLocations[] = { -1, -1, -1 }; + + boolean mayHavePrefix; + + // Handle the first pair specially -- it could be a prefix. + if (ddebug) System.out.println("i is " + 0); + if ((mayHavePrefix = get(0).isPrefix()) && null == get(0).getRight()) { + // special case: we must have a branch in the parse tree + // for the initial part of this pair list. For example, + // is DKHYA D+KH+YA or D-KH+YA? It depends on prefix + // rules (can KH+YA take a DA prefix?), so the parse tree + // includes both. + breakLocations[nextBreakLoc++] = 0; + } + + // stack numbers start at 1. + int stackNumber = (get(0).endsACIPStack()) ? 2 : 1; + // this starts at 0. + int stackStart = (get(0).endsACIPStack()) ? 1 : 0; + + int numeric = 0; // 1 means surely, 0 means we don't know yet, -1 means surely not + + for (int i = 1; i < sz; i++) { if (ddebug) System.out.println("i is " + i); TPair p = get(i); - if (p.getRight() == null && firstPair + 1 < sz) { - // Here's the ambiguity. Let's fill up sl. (B . ) (G - // . +) (K . A) could be {B+G+KA} or {BA}{G+KA}, so we - // go until we hit a vowel and then break into - // TPairLists. - int start = firstPair; - int blanks[] = new int[sz - start]; // we may not use all of this. - int j; - for (j = start; j < sz; j++) { - TPair pj = get(j); - boolean isBlank; - if (ddebug) System.out.println("right guy is " + pj.getRight()); - if (pj.isDisambiguator()) - blanks[j-start] = ALWAYS_STOP_STACKING; - else { - if (!(isBlank = (pj.getRight() == null)) && !"+".equals(pj.getRight())) { - if (ddebug) System.out.println("breaker breaker at j=" + j); - break; - } - blanks[j-start] = isBlank ? STOP_STACK : ALWAYS_KEEP_STACKING; - } - } - if (j >= sz) j = sz - 1; - blanks[j-start] = ALWAYS_STOP_STACKING; - - // get(j) [corresponding to blanks[j-i]] is - // the last pair in the ambiguous stretch; get(i) - // [corresponding to blanks[0]] is the first. - - // We'll end up doing 2**(j-i+1) (i.e., (1 << - // (j-i+1))) iterations. If that's going to be too - // many, let's just say there's no legal parse. FIXME: - // give a nice error message in this case. - if (ddebug) System.out.println("ddebug: we're going to do 2^" + (j-i+1) + " [or " + (1 << (j-i+1)) + "] wacky iterations!"); - if ((j-i+1) > 13) // if you don't use 13, then change PackageTest.testSlowestTshegBar(). - return null; - - boolean keepGoing = true; - TStackListList sll = new TStackListList(); - do { - // Add the stack list currently specified by - // blanks if all the stacks in it are legal. -// DLC DELETE { -// ArrayList x = new ArrayList((j-start+1)); -// for (int ii = 0; ii < (j-start+1); ii++) -// x.add(new Integer(blanks[ii])); -// } - TStackList sl = new TStackList(sz - start); - boolean illegal = false; - TPairList currentStack = new TPairList(); - for (int k = 0; k < j-start+1; k++) { - TPair pk = get(start + k); - if (!pk.isDisambiguator()) { - currentStack.add(pk.insideStack()); - if (blanks[k] == STOP_STACK) { - if (currentStack.isLegalTibetanOrSanskritStack()) - sl.add(currentStack.asStack()); - else { - illegal = true; - break; - } - currentStack = new TPairList(); - } - } - } - if (!illegal && !currentStack.isEmpty()) { - if (currentStack.isLegalTibetanOrSanskritStack()) { - TPairList stack = currentStack.asStack(); - if (ddebug) System.out.println("adding currentStack " + stack + " to sl " + sl); - sl.add(stack); - } else { - illegal = true; - } - } - if (!illegal) { - if (ddebug) System.out.println("adding sl " + sl + " to sll " + sll); - sll.add(sl); - } - - // Update blanks. Think of this as doing base 2 - // arithmetic where STOP_STACK is zero, - // KEEP_STACKING is one, and ALWAYS_KEEP_STACKING - // and ALWAYS_STOP_STACKING are digits we cannot - // modify. We'll end up doing 2^M iterations, - // where M is the number of fields in blanks that - // are not equal to ALWAYS_KEEP_STACKING or - // ALWAYS_STOP_STACKING. - keepGoing = false; - for (int k = j-start; k >= 0; k--) { - if (blanks[k] == STOP_STACK) { - keepGoing = true; - blanks[k] = KEEP_STACKING; - // reset all digits to the right of k to - // "zero": - for (int m = k + 1; m < j-start+1; m++) { - if (blanks[m] == KEEP_STACKING) - blanks[m] = STOP_STACK; - } - break; - } - } - } while (keepGoing); - if (sll.isEmpty()) - return null; // STXAL or shT+ZNAGN, e.g. - else { - if (ddebug) System.out.println("adding sll " + sll + " to parse tree " + pt); - pt.add(sll); - } - - if (ddebug) System.out.println("i is " + i + " and j is " + j + " and we are resetting so that i==j+1 next time."); - i = j; - firstPair = j + 1; - } else if ("+".equals(p.getRight())) { - // Keep firstPair where it is. + boolean nn; + if ((nn = p.isNumeric()) && ("+".equals(get(i-1).getRight()) + || "+".equals(p.getRight()))) + return null; // clearly illegal. You can't stack numbers. + if (nn) { + if (-1 == numeric) + return null; // you can't mix numbers and letters. + else if (0 == numeric) + numeric = 1; } else { - // Add all pairs in the range [firstPair, i]. Some - // pairs are stacks all by themselves, some pairs have - // '+' on the right and are thus just part of a stack. - // We'll add a whole number of stacks, though. - - // this is initialized to hold the max we might use: - TStackListList sll - = new TStackListList(i - firstPair + 1); + if (numeric == 1) + return null; // you can't mix numbers and letters. + else if (0 == numeric && !p.isDisambiguator()) + numeric = -1; + } - TPairList currentStack = new TPairList(); - for (int j = firstPair; j <= i; j++) { - TPair pj = get(j); - if (!pj.isDisambiguator()) { - currentStack.add(pj.insideStack()); - if (!"+".equals(pj.getRight())) { - if (currentStack.isLegalTibetanOrSanskritStack()) - sll.add(new TStackList(currentStack.asStack())); - else { - return null; - } - currentStack = new TPairList(); + if (i+1==sz || p.endsACIPStack()) { + if (/* the stack ending here might really be + suffix-postsuffix or + suffix-appendage or + suffix-postsuffix-appendage */ + (mayHavePrefix && (stackNumber == 2 || stackNumber == 3)) + || (!mayHavePrefix && (stackNumber == 2))) { + if (i > stackStart) { + if (get(stackStart).isSuffix() + && (get(stackStart+1).isPostSuffix() // suffix-postsuffix + || "'".equals(get(stackStart+1).getLeft()))) // suffix-appendage + breakLocations[nextBreakLoc++] = stackStart; + if (i > stackStart + 1) { + // three to play with, maybe it's + // suffix-postsuffix-appendage. + if (get(stackStart).isSuffix() + && get(stackStart+1).isPostSuffix() + && "'".equals(get(stackStart+2).getLeft())) + breakLocations[nextBreakLoc++] = stackStart+1; } } + // else no need to insert a breakLocation, we're + // breaking hard. } - if (!currentStack.isEmpty()) - throw new Error("how can this happen? currentStack is " + currentStack); - - if (!sll.isEmpty()) { - if (ddebug) System.out.println("adding sll " + sll + " to parse tree " + pt); - pt.add(sll); - firstPair = i + 1; - } // else you probably have {G--YA} or something as - // your tsheg bar. + if (/* the stack ending here might really be + postsuffix-appendage (e.g., GDAM-S'O) */ + (mayHavePrefix && (stackNumber == 3 || stackNumber == 4)) + || (!mayHavePrefix && (stackNumber == 3))) { + if (i == stackStart+1) { // because GDAM--S'O is illegal, and because it's 'ANG, not 'NG, 'AM, not 'M -- ' always ends the stack + if (get(stackStart).isPostSuffix() + && "'".equals(get(stackStart+1).getLeft())) + breakLocations[nextBreakLoc++] = stackStart; + } + } + ++stackNumber; + stackStart = i+1; } } + // DLC FIXME: we no longer need all these breakLocations -- we can handle SAM'AM'ANG + + // Now go from hard break (i.e., (* . VOWEL or -)) to hard + // break (and there's a hard break after the last pair, of + // course, even if it is (G . ) or (G . +) [the latter being + // hideously illegal]). Between the hard breaks, there will + // be 1, 2, or 4 (can you see why 8 isn't possible, though + // numBreaks can be 3?) possible parses. There are two of DGA + // in DGAMS'O -- D-GA and D+GA. There are 4 of MS'O in + // DGAMS'O -- M-S-'O, M-S+'O, M+S-'O, and M+S+'O. Add one + // TStackListList per hard break to pt, the parse tree. + int startLoc = 0; // which pair starts this hard break? + + // DLC FIXME: assert this + if ((breakLocations[1] >= 0 && breakLocations[1] <= breakLocations[0]) + || (breakLocations[2] >= 0 && breakLocations[2] <= breakLocations[1])) + throw new Error("breakLocations is monotonically increasing, ain't it?"); + + for (int i = 0; i < sz; i++) { + if (i+1 == sz || get(i).endsACIPStack()) { + TStackListList sll = new TStackListList(4); // maximum is 4. + + int numBreaks = 0; + int breakStart = -1; + for (int jj = 0; jj < breakLocations.length; jj++) { + if (breakLocations[jj] >= startLoc + && breakLocations[jj] <= i) { + if (breakStart < 0) + breakStart = jj; + ++numBreaks; + } + } + + // Count from [0, 1< 0) { + for (int j = 0; breakStart+j < 3; j++) { + if (k == breakLocations[breakStart+j] + && 1 == ((counter >> j) & 1)) { + if (!currentStack.isEmpty()) + sl.add(currentStack.asStack()); + currentStack = new TPairList(); + break; // shouldn't matter, but you never know + } + } + } + } + } + if (!sl.isEmpty()) { + sll.add(sl); + } + } + + if (!sll.isEmpty()) + pt.add(sll); + startLoc = i+1; + } + } + + if (pt.isEmpty()) return null; return pt; } - /** Returns true if and only if this list of TPairs can be - * interpreted as a legal Tibetan stack or a legal Tibetanized - * Sanskrit stack. This is private because a precondition is - * that no vowels or disambiguators appear except possibly in the - * final pair. */ - private boolean isLegalTibetanOrSanskritStack() { - StringBuffer tibetan = new StringBuffer(); - StringBuffer sanskrit = new StringBuffer(); - int sz = size(); - - // Special case because otherwise wa-zur alone would be seen - // as legal. - if (sz == 1 && "V".equals(get(0).getLeft())) - return false; - - for (int i = 0; i < sz; i++) { - TPair p = get(i); - String ewts_form - = ACIPRules.getWylieForACIPConsonant(p.getLeft()); - if (null == ewts_form) { - if (p.isNumeric()) - ewts_form = p.getLeft(); - } - if (null == ewts_form) { - if (ddebug) System.out.println("testing " + toString2() + " for legality said false. numeric?" + p.isNumeric() + "[1]"); - return false; - } - tibetan.append(ewts_form); - sanskrit.append(ewts_form); - if (i + 1 < sz) { - tibetan.append('-'); - sanskrit.append('+'); - } - } - boolean ans = - (TibetanMachineWeb.hasGlyph(tibetan.toString()) - || TibetanMachineWeb.hasGlyph(sanskrit.toString())); - if (ddebug) System.out.println("testing " + toString2() + " for legality said " + ans + " [2]; san is " + sanskrit + " tib is " + tibetan + "."); - return ans; - } private static final boolean ddebug = false; /** Mutates this TPairList object such that the last pair is @@ -611,9 +611,11 @@ class TPairList { } /** Appends the DuffCodes that correspond to this grapheme cluster - * to duff. Assumes this is one grapheme cluster. */ - void getDuff(ArrayList duff) { - int previousSize = duff.size(); + * to duffsAndErrors, or appends a String that is an error + * message saying that TMW cannot represent this grapheme + * cluster. */ + void getDuff(ArrayList duffsAndErrors) { + int previousSize = duffsAndErrors.size(); StringBuffer wylieForConsonant = new StringBuffer(); for (int x = 0; x + 1 < size(); x++) { wylieForConsonant.append(get(x).getWylie(false)); @@ -624,17 +626,18 @@ class TPairList { if (!TibetanMachineWeb.isKnownHashKey(hashKey)) { hashKey = hashKey.replace('+', '-'); if (!TibetanMachineWeb.isKnownHashKey(hashKey)) { - throw new Error("How did this happen?"); + duffsAndErrors.add("[#ERROR The ACIP {" + recoverACIP() + "} cannot be represented with the TibetanMachine or TibetanMachineWeb fonts because no such glyph exists in these fonts.]"); + return; } } if (lastPair.getRight() == null || lastPair.equals("-")) { - duff.add(TibetanMachineWeb.getGlyph(hashKey)); + duffsAndErrors.add(TibetanMachineWeb.getGlyph(hashKey)); } else { - ACIPRules.getDuffForACIPVowel(duff, + ACIPRules.getDuffForACIPVowel(duffsAndErrors, TibetanMachineWeb.getGlyph(hashKey), lastPair.getRight()); } - if (previousSize == duff.size()) + if (previousSize == duffsAndErrors.size()) throw new Error("TPairList with no duffs? " + toString()); // DLC FIXME: change to assertion. } } diff --git a/source/org/thdl/tib/text/ttt/TPairListFactory.java b/source/org/thdl/tib/text/ttt/TPairListFactory.java index 9264b6d..8e19629 100644 --- a/source/org/thdl/tib/text/ttt/TPairListFactory.java +++ b/source/org/thdl/tib/text/ttt/TPairListFactory.java @@ -38,9 +38,42 @@ class TPairListFactory { * rest would be suboptimal, so we backtrack to [(T . )] and then * finally become [(T . ), (A . A)]. We look for (A . ) and ( * . ) in the rest in order to say "the rest would be - * suboptimal", i.e. we use TPairList.hasSimpleError() - * @param acip a string of ACIP with no punctuation in it */ - static TPairList breakACIPIntoChunks(String acip) { + * suboptimal", i.e. we use TPairList.hasSimpleError().

+ * + *

There is one case where we break things up into two pair + * lists -- I found out about this case too late to do anything + * clean about it. SNYAM'AM, e.g., breaks up into [(S . ), (NY + * . A), (M . 'A), (M . )], which is incorrect -- [(S . ), (NY + * . A), (M . ), (' . A), (M . )] is correct. But we don't know + * which is correct without parsing, so both are returned. The + * clean treatment (low-priority FIXME) would be to lex into a + * form that didn't insist 'A was either a vowel or a consonant. + * Then the parser would figure it out.

+ * + * @param acip a string of ACIP with no punctuation in it + * @return an array of one or two pair lists, if the former, then + * the second element will be null, if the latter, the second + * element will have (* . ), (' . *) instead of (* . '*) which + * the former has @throws IllegalArgumentException if acip is too + * large for us to break into chunks (we're recursive, not + * iterative, so the boundary can be increased a lot if you care, + * but you don't) */ + static TPairList[] breakACIPIntoChunks(String acip) throws IllegalArgumentException { + try { + TPairList a = breakHelper(acip, true); + TPairList b = breakHelper(acip, false); + if (a.equals(b)) + return new TPairList[] { a, null }; + else + return new TPairList[] { a, b }; + } catch (StackOverflowError e) { + throw new IllegalArgumentException("Input too large[1]: " + acip); + } catch (OutOfMemoryError e) { + throw new IllegalArgumentException("Input too large[2]: " + acip); + } + } + /** Helps {@link breakACIPIntoChunks(String)}. */ + private static TPairList breakHelper(String acip, boolean tickIsVowel) { // base case for our recursion: if ("".equals(acip)) @@ -50,9 +83,21 @@ class TPairListFactory { int howMuchBuf[] = new int[1]; TPair head = getFirstConsonantAndVowel(acipBuf, howMuchBuf); int howMuch = howMuchBuf[0]; + if (!tickIsVowel + && null != head.getLeft() + && null != head.getRight() + && head.getRight().startsWith("'")) { + head = new TPair(head.getLeft(), + // Without this disambiguator, we are + // less efficient (8 parses, not 4) and + // we can't handle PA'AM'ANG etc. + "-"); + howMuch = head.getLeft().length(); + } + TPairList tail; if ((tail - = breakACIPIntoChunks(acipBuf.substring(howMuch))).hasSimpleError()) { + = breakHelper(acipBuf.substring(howMuch), tickIsVowel)).hasSimpleError()) { for (int i = 1; i < howMuch; i++) { // try giving i characters back if that leaves us with // a legal head and makes the rest free of simple @@ -61,7 +106,7 @@ class TPairListFactory { TPair newHead; if ((newHead = head.minusNRightmostACIPCharacters(i)).isLegal() && !(newTail - = breakACIPIntoChunks(acipBuf.substring(howMuch - i))).hasSimpleError()) { + = breakHelper(acipBuf.substring(howMuch - i), tickIsVowel)).hasSimpleError()) { newTail.prepend(newHead); return newTail; } diff --git a/source/org/thdl/tib/text/ttt/TParseTree.java b/source/org/thdl/tib/text/ttt/TParseTree.java index 64c6ed1..841177b 100644 --- a/source/org/thdl/tib/text/ttt/TParseTree.java +++ b/source/org/thdl/tib/text/ttt/TParseTree.java @@ -184,10 +184,7 @@ class TParseTree { } /** Returns a list containing the unique legal parse of this parse - * tree if there is a unique legal parse. Note that {SRAS} has a - * unique legal parse, though {SRS} has two equally good parses; - * i.e., note that the {A} vowel is treated specially here - * (unlike in {@link #getLegalParses()}). Returns an empty list + * tree if there is a unique legal parse. Returns an empty list * if there are no legal parses. Returns a list containing all * legal parses if there two or more equally good parses. By * "legal", we mean a sequence of stacks that is legal @@ -223,13 +220,21 @@ class TParseTree { if (allStrictlyLegalParses.size() > 2) throw new Error("can this happen?"); if (legalParsesWithVowelOnRoot.size() == 2) { - if (legalParsesWithVowelOnRoot.get(0).size() != 1 + legalParsesWithVowelOnRoot.get(1).size()) - throw new Error("Something other than the G-YA vs. GYA case appeared. Sorry for your trouble! " + legalParsesWithVowelOnRoot.get(0) + " ;; " + legalParsesWithVowelOnRoot.get(1)); - return new TStackListList(legalParsesWithVowelOnRoot.get(1)); + if (legalParsesWithVowelOnRoot.get(0).size() + != 1 + legalParsesWithVowelOnRoot.get(1).size()) { + // MARDA is MAR+DA or MA-R-DA -- both are legal if + // noPrefixTests. + return new TStackListList(); + } else { + // G-YA vs. GYA. + return new TStackListList(legalParsesWithVowelOnRoot.get(1)); + } } if (allNonillegalParses.size() == 2) { - if (allNonillegalParses.get(0).size() != 1 + allNonillegalParses.get(1).size()) - throw new Error("Something other than the G-YA vs. GYA case appeared. Sorry for your trouble! " + allNonillegalParses.get(0) + " ;; " + allNonillegalParses.get(1)); + if (allNonillegalParses.get(0).size() != 1 + allNonillegalParses.get(1).size()) { + // BDREN, e.g., if noPrefixTests: + return new TStackListList(); + } return new TStackListList(allNonillegalParses.get(1)); } return allNonillegalParses; diff --git a/source/org/thdl/tib/text/ttt/TStackList.java b/source/org/thdl/tib/text/ttt/TStackList.java index 647ce54..e624bb6 100644 --- a/source/org/thdl/tib/text/ttt/TStackList.java +++ b/source/org/thdl/tib/text/ttt/TStackList.java @@ -131,7 +131,7 @@ class TStackList { * stack can take every prefix, which is not the case in * reality */ public BoolTriple isLegalTshegBar(boolean noPrefixTests) { - // DLC handle PADMA and other Tibetanized Sanskrit fellows consistently. Right now we only treat single-stack Sanskrit guys as legal. + // DLC Should we handle PADMA and other Tibetanized Sanskrit fellows consistently? Right now we only treat single-stack Sanskrit guys as legal. TTGCList tgcList = new TTGCList(this); StringBuffer warnings = new StringBuffer(); @@ -191,8 +191,10 @@ class TStackList { * @param isLastStack if non-null, then isLastStack[0] will be * set to true if and only if the very last stack is the only * stack not to have a vowel or disambiguator on it */ + // DLC FIXME: DELETE THIS WARNING and this code unless EWTS will need it... boolean hasStackWithoutVowel(TPairList opl, boolean[] isLastStack) { int runningSize = 0; + // DLC FIXME: MARDA is MARD==MAR-D to us, but is probably MAR+DA, warn for (int i = 0; i < size(); i++) { TPairList pl = get(i); String l; @@ -207,7 +209,7 @@ class TStackList { } } if (runningSize != opl.sizeMinusDisambiguators()) - throw new IllegalArgumentException("opl (" + opl + ") is bad for this stack list (" + toString() + ")"); + throw new IllegalArgumentException("runningSize = " + runningSize + "; opl.sizeMinusDisambiguators = " + opl.sizeMinusDisambiguators() + "; opl (" + opl + ") is bad for this stack list (" + toString() + ")"); return false; } @@ -219,8 +221,11 @@ class TStackList { } return u.toString(); } - /** Returns the DuffCodes corresponding to this stack list. */ - DuffCode[] getDuff() { + /** Returns the DuffCodes and errors corresponding to this stack + list. Each element of the array is a DuffCode or a String, the + latter if and only if the TMW font cannot represent the + corresponding stack in this list. */ + Object[] getDuff() { ArrayList al = new ArrayList(size()*2); // rough estimate int count = 0; for (int i = 0; i < size(); i++) { @@ -229,20 +234,40 @@ class TStackList { if (size() > 0 && al.size() == 0) { throw new Error("But this stack list, " + this + ", contains " + size() + " stacks! How can it not have DuffCodes associated with it?"); } - return (DuffCode[])al.toArray(new DuffCode[] { }); + return al.toArray(); } } /** Too simple to comment. */ -class BoolTriple { +class BoolTriple implements Comparable { boolean isLegal; boolean isLegalButSanskrit; // some subset are legal but legal Sanskrit -- the single sanskrit stacks are this way, such as B+DE. boolean isLegalAndHasAVowelOnRoot; BoolTriple(boolean isLegal, boolean isLegalButSanskrit, boolean isLegalAndHasAVowelOnRoot) { + if (!isLegal && (isLegalButSanskrit || isLegalAndHasAVowelOnRoot)) + throw new IllegalArgumentException(); this.isLegal = isLegal; this.isLegalButSanskrit = isLegalButSanskrit; this.isLegalAndHasAVowelOnRoot = isLegalAndHasAVowelOnRoot; } + private int score() { + int score = 0; + if (isLegalAndHasAVowelOnRoot) { + score += 5; + } + if (isLegal) { + score += 5; + } + if (isLegalButSanskrit) { + score -= 3; + } + return score; + } + /** The most legal BoolTriple compares higher. */ + public int compareTo(Object o) { + BoolTriple b = (BoolTriple)o; + return score() - b.score(); + } } diff --git a/source/org/thdl/tib/text/ttt/package.html b/source/org/thdl/tib/text/ttt/package.html index db7b7fe..75c2fe6 100644 --- a/source/org/thdl/tib/text/ttt/package.html +++ b/source/org/thdl/tib/text/ttt/package.html @@ -25,6 +25,12 @@ Machine Web and methods for converting EWTS transliteration into Tibetan Machine Web.  It has extensive tests, though probably not mentioned in these Javadoc documents.

+

+When you see the term "Sanskrit" used here, it often means +non-native (not native Tibetan, in other words) rather than truly +Tibetanized Sanskrit.  It is overloaded to refer to Tibetanized +Chinese, Tibetanized Sanskrit, etc. +

Related Documentation

@see org.thdl.tib.text