TMW->Wylie conversion now takes advantage of prefix rules, the rules

that say "ya can take a ga prefix" etc. The ACIP->Unicode converter now gives warnings (optionally, and by default, inline). This converter now produces output even when lexical errors occur, but the output has errors and warnings inline.
2003-08-23 22:03:37 +00:00 · 2003-08-23 22:03:37 +00:00 · d5ad760230
commit d5ad760230
parent 21ef657921
14 changed files with 678 additions and 270 deletions
--- a/source/org/thdl/tib/text/ttt/ACIPConverter.java
+++ b/source/org/thdl/tib/text/ttt/ACIPConverter.java
@ -58,28 +58,46 @@ public class ACIPConverter {
        ArrayList al = ACIPTshegBarScanner.scanFile(args[1], errors, strict, maxErrors - 1);

        if (null == al) {
-            System.err.println(maxErrors + " or more errors occurred while scanning ACIP input file; is this");
+            System.err.println(maxErrors + " or more lexical errors occurred while scanning ACIP input file; is this");
            System.err.println("Tibetan or English input?");
            System.err.println("");
-            System.err.println("First " + maxErrors + " errors scanning ACIP input file: ");
-            System.err.println(errors);
-            System.err.println("Exiting with " + maxErrors + " or more errors; please fix input file and try again.");
+            if (false) {
+                // Nobody wants to see this.  FIXME: maybe somebody; have an option.
+                System.err.println("First " + maxErrors + " lexical errors scanning ACIP input file: ");
+                System.err.println(errors);
+            }
+            System.err.println("Exiting with " + maxErrors + " or more lexical errors; please fix input file and try again.");
            System.exit(1);
        }
+        final boolean abortUponScanningError = false; // DLC MAKE ME CONFIGURABLE
+        // DLC NOW: BAo isn't converting.
        if (errors.length() > 0) {
            System.err.println("Errors scanning ACIP input file: ");
            System.err.println(errors);
-            System.err.println("Exiting; please fix input file and try again.");
-            System.exit(1);
+            if (abortUponScanningError) {
+                System.err.println("Exiting; please fix input file and try again.");
+                System.exit(1);
+            }
        }

-        convertToUnicode(al, System.out, errors);
+        StringBuffer warnings = new StringBuffer();
+        boolean putWarningsInOutput = true; // DLC make me configurable.
+        convertToUnicode(al, System.out, errors, warnings,
+                         putWarningsInOutput);
        if (errors.length() > 0) {
            System.err.println("Errors converting ACIP input file: ");
            System.err.println(errors);
+            System.err.println("The output contains these errors.");
            System.err.println("Exiting; please fix input file and try again.");
            System.exit(2);
        }
+        if (warnings.length() > 0) {
+            System.err.println("Warnings converting ACIP input file: ");
+            System.err.println(warnings);
+            if (putWarningsInOutput)
+                System.err.println("The output contains these warnings.");
+            System.exit(2);
+        }
        if (verbose) System.err.println("Converted " + args[1] + " perfectly.");
        System.exit(0);
    }
@ -96,19 +114,30 @@ public class ACIPConverter {
    {
        throw new Error("DLC UNIMPLEMENTED");
    }
+    // DLC FIXME: sometimes { } is \u0F0B, and sometimes it is a
+    // space.  Treat it as a tsheg only when it appears after a
+    // syllable or another tsheg.

    /** Returns UTF-8 encoded Unicode.  A bit indirect, so use this
     *  for testing only if performance is a concern.  If errors occur
     *  in scanning the ACIP or in converting a tsheg bar, then they
-     *  are appended to errors if errors is non-null.  Returns the
+     *  are appended to errors if errors is non-null, as well as
+     *  written to the result.  If warnings occur in scanning the ACIP
+     *  or in converting a tsheg bar, then they are appended to
+     *  warnings if warnings is non-null, and they are written to the
+     *  result if writeWarningsToResult is true.  Returns the
     *  conversion upon perfect success, null if errors occurred.
     */
    public static String convertToUnicode(String acip,
-                                          StringBuffer errors) {
+                                          StringBuffer errors,
+                                          StringBuffer warnings,
+                                          boolean writeWarningsToResult) {
        ByteArrayOutputStream sw = new ByteArrayOutputStream();
        ArrayList al = ACIPTshegBarScanner.scan(acip, errors, true /* DLC FIXME */, -1);
        try {
-            if (null != al && convertToUnicode(al, sw, errors)) {
+            if (null != al
+                && convertToUnicode(al, sw, errors,
+                                    warnings, writeWarningsToResult)) {
                return sw.toString("UTF-8");
            } else {
                System.out.println("DLC al is " + al + " and convertToUnicode returned null.");
@ -119,15 +148,25 @@ public class ACIPConverter {
        }
    }

-    /** Writes Unicode to out.  If errors occur in converting a
-     *  tsheg bar, then they are appended to errors if errors is
-     *  non-null.  Returns true upon perfect success, false if errors
-     *  occurred.
+    /** Writes Unicode to out.  If errors occur in converting a tsheg
+     *  bar, then they are appended to errors if errors is non-null.
+     *  Furthermore, errors are written to out.  If writeWarningsToOut
+     *  is true, then warnings also will be written to out.  Returns
+     *  true upon perfect success, false if errors occurred.
+     *  @param scan result of ACIPTshegBarScanner.scan(..)
+     *  @param out stream to which to write converted text
+     *  @param errors if non-null, all error messages are appended
+     *  @param warnings if non-null, all warning messages are appended
+     *  to this
+     *  @param writeWarningsToOut if true, then all warning messages
+     *  are written to out in the appropriate places
     *  @throws IOException if we cannot write to out
     */
    public static boolean convertToUnicode(ArrayList scan,
                                           OutputStream out,
-                                           StringBuffer errors)
+                                           StringBuffer errors,
+                                           StringBuffer warnings,
+                                           boolean writeWarningsToOut)
        throws IOException
    {
        int sz = scan.size();
@ -139,7 +178,7 @@ public class ACIPConverter {
            int stype = s.getType();
            if (stype == ACIPString.ERROR) {
                hasErrors = true;
-                writer.write("[#ERROR CONVERTING ACIP DOCUMENT: ");
+                writer.write("[#ERROR CONVERTING ACIP DOCUMENT: Lexical error: ");
                writer.write(s.getText());
                writer.write("]");
            } else {
@ -179,6 +218,21 @@ public class ACIPConverter {
                                    if (null != errors)
                                        errors.append(errorMessage + "\n");
                                } else {
+                                    String warning
+                                        = pt.getWarning(false, // DLC: make me configurable
+                                                        pl,
+                                                        s.getText());
+                                    if (null != warning) {
+                                        if (writeWarningsToOut) {
+                                            writer.write("[#WARNING CONVERTING ACIP DOCUMENT: ");
+                                            writer.write(warning);
+                                            writer.write("]");
+                                        }
+                                        if (null != warnings) {
+                                            warnings.append(warning);
+                                            warnings.append('\n');
+                                        }
+                                    }
                                    unicode = sl.getUnicode();
                                    if (null == unicode) throw new Error("DLC: HOW?");
                                }
--- a/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java
+++ b/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java
@ -133,16 +133,18 @@ public class ACIPTshegBarScanner {
        Stack bracketTypeStack = new Stack();
        int startSlashIndex = -1;
        int startParenIndex = -1;
+        int numNewlines = 0;
        for (int i = 0; i < sl; i++) {
            if (i < startOfString) throw new Error("bad reset");
            char ch;
            ch = s.charAt(i);
+            if (ch == '\n') ++numNewlines;
            if (ACIPString.COMMENT == currentType && ch != ']') {
                if ('[' == ch) {
                    al.add(new ACIPString("Found an open bracket within a [#COMMENT]-style comment.  Brackets may not appear in comments.\n",
                                          ACIPString.ERROR));
                    if (null != errors)
-                        errors.append("Offset " + i + ": "
+                        errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                      + "Found an open bracket within a [#COMMENT]-style comment.  Brackets may not appear in comments.\n");
                    if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
                }
@ -157,17 +159,18 @@ public class ACIPTshegBarScanner {
                        al.add(new ACIPString(s.substring(startOfString, i),
                                              currentType));
                    }
-                    al.add(new ACIPString(s.substring(i, i+1), ACIPString.ERROR));
+                    al.add(new ACIPString("Found a truly unmatched close bracket, " + s.substring(i, i+1),
+                                          ACIPString.ERROR));
                    if (!waitingForMatchingIllegalClose) {
                        if (null != errors) {
-                            errors.append("Offset " + i + ": "
+                            errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                          + "Found a truly unmatched close bracket, ] or }.\n");
                        }
                        if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
                    }
                    waitingForMatchingIllegalClose = false;
                    if (null != errors)
-                        errors.append("Offset " + i + ": "
+                        errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                      + "Found a closing bracket without a matching open bracket.  Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
                    if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
                    startOfString = i+1;
@ -249,6 +252,11 @@ public class ACIPTshegBarScanner {
                               || s.substring(i, i + "[BP]".length()).equals("{BP}"))) {
                    thingy = "[BP]";
                    currentType = ACIPString.BP;
+                } else if (i + "[BLANK PAGE]".length() <= sl
+                           && (s.substring(i, i + "[BLANK PAGE]".length()).equals("[BLANK PAGE]")
+                               || s.substring(i, i + "[BLANK PAGE]".length()).equals("{BLANK PAGE}"))) {
+                    thingy = "[BLANK PAGE]";
+                    currentType = ACIPString.BP;
                } else if (i + "[ BP ]".length() <= sl
                           && (s.substring(i, i + "[ BP ]".length()).equals("[ BP ]")
                               || s.substring(i, i + "[ BP ]".length()).equals("{ BP }"))) {
@ -414,11 +422,11 @@ public class ACIPTshegBarScanner {
                    // This is an error.  Sometimes [COMMENTS APPEAR
                    // WITHOUT # MARKS].  Though "... [" could cause
                    // this too.
-                    al.add(new ACIPString(s.substring(i, i+1),
+                    al.add(new ACIPString("Found an illegal open bracket: " + s.substring(i, i+1),
                                          ACIPString.ERROR));
                    if (waitingForMatchingIllegalClose) {
                        if (null != errors) {
-                            errors.append("Offset " + i + ": "
+                            errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                          + "Found a truly unmatched open bracket, [ or {, prior to this current illegal open bracket.\n");
                        }
                        if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
@ -435,7 +443,7 @@ public class ACIPTshegBarScanner {
                                inContext = inContext + "...";
                            }
                        }
-                        errors.append("Offset " + i + ": "
+                        errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                      + "Found an illegal open bracket (in context, this is " + inContext + ").  Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\n");
                        if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
                    }
@ -477,7 +485,6 @@ public class ACIPTshegBarScanner {
                            if (i+numdigits+2 < sl && s.charAt(i+numdigits+2) == '.') {
                                if (!(i+numdigits+4 < sl && isNumeric(s.charAt(i+numdigits+3))
                                      && !isNumeric(s.charAt(i+numdigits+4)))) {
-                                    al.add(new ACIPString(s.substring(i, i+numdigits+3), ACIPString.ERROR));
                                    String inContext = s.substring(i, i+Math.min(sl-i, 10));
                                    if (inContext.indexOf("\r") >= 0) {
                                        inContext = inContext.substring(0, inContext.indexOf("\r"));
@ -488,8 +495,10 @@ public class ACIPTshegBarScanner {
                                            inContext = inContext + "...";
                                        }
                                    }
+                                    al.add(new ACIPString("Found an illegal at sign, @ (in context, this is " + inContext + ").  This folio marker has a period, '.', at the end of it, which is illegal.",
+                                                          ACIPString.ERROR));
                                    if (null != errors)
-                                        errors.append("Offset " + i + ": "
+                                        errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                                      + "Found an illegal at sign, @ (in context, this is " + inContext + ").  This folio marker has a period, '.', at the end of it, which is illegal.\n");
                                    if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
                                    startOfString = i+numdigits+3;
@ -498,7 +507,6 @@ public class ACIPTshegBarScanner {
                                    break;
                                }
                                if (i+numdigits+4 < sl && (s.charAt(i+numdigits+4) == '.' || s.charAt(i+numdigits+4) == 'A' || s.charAt(i+numdigits+4) == 'B' || s.charAt(i+numdigits+4) == 'a' || s.charAt(i+numdigits+4) == 'b' || isNumeric(s.charAt(i+numdigits+4)))) {
-                                    al.add(new ACIPString(s.substring(i, i+1), ACIPString.ERROR));
                                    String inContext = s.substring(i, i+Math.min(sl-i, 10));
                                    if (inContext.indexOf("\r") >= 0) {
                                        inContext = inContext.substring(0, inContext.indexOf("\r"));
@ -509,8 +517,10 @@ public class ACIPTshegBarScanner {
                                            inContext = inContext + "...";
                                        }
                                    }
+                                    al.add(new ACIPString("Found an illegal at sign, @ (in context, this is " + inContext + ").  This folio marker is not followed by whitespace, as is expected.",
+                                                          ACIPString.ERROR));
                                    if (null != errors)
-                                        errors.append("Offset " + i + ": "
+                                        errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                                      + "Found an illegal at sign, @ (in context, this is " + inContext + ").  This folio marker is not followed by whitespace, as is expected.\n");
                                    if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
                                    startOfString = i+1; // DLC FIXME: skip over more?
@ -572,7 +582,9 @@ public class ACIPTshegBarScanner {
                    }
                    
                    // This case, @NNN, must come after the @NNN{AB} case.
-                    if (i+numdigits+1 < sl && s.charAt(i+numdigits+1) == ' ') {
+                    if (i+numdigits+1 < sl && (s.charAt(i+numdigits+1) == ' '
+                                               || s.charAt(i+numdigits+1) == '\n'
+                                               || s.charAt(i+numdigits+1) == '\r')) {
                        boolean allAreNumeric = true;
                        for (int k = 1; k <= numdigits; k++) {
                            if (!isNumeric(s.charAt(i+k))) {
@ -591,7 +603,6 @@ public class ACIPTshegBarScanner {
                    }
                }
                if (startOfString == i) {
-                    al.add(new ACIPString(s.substring(i, i+1), ACIPString.ERROR));
                    String inContext = s.substring(i, i+Math.min(sl-i, 10));
                    if (inContext.indexOf("\r") >= 0) {
                        inContext = inContext.substring(0, inContext.indexOf("\r"));
@ -602,8 +613,10 @@ public class ACIPTshegBarScanner {
                            inContext = inContext + "...";
                        }
                    }
+                    al.add(new ACIPString("Found an illegal at sign, @ (in context, this is " + inContext + ").  @012B is an example of a legal folio marker.",
+                                          ACIPString.ERROR));
                    if (null != errors)
-                        errors.append("Offset " + i + ": "
+                        errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                      + "Found an illegal at sign, @ (in context, this is " + inContext + ").  @012B is an example of a legal folio marker.\n");
                    if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
                    startOfString = i+1;
@ -626,9 +639,10 @@ public class ACIPTshegBarScanner {
                         * it means /NYA/.  We warn about // for this
                         * reason.  \\ causes a tsheg-bar error (DLC
                         * FIXME: verify this is so). */
-                        al.add(new ACIPString("//", ACIPString.ERROR));
+                        al.add(new ACIPString("Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.",
+                                              ACIPString.ERROR));
                        if (errors != null) {
-                            errors.append("Offset " + i + ": "
+                            errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                          + "Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\n");
                        }
                        if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
@ -661,9 +675,10 @@ public class ACIPTshegBarScanner {

                if (startParenIndex >= 0) {
                    if (ch == '(') {
-                        al.add(new ACIPString("Nesting of parentheses () is not allowed", ACIPString.ERROR));
+                        al.add(new ACIPString("Found an illegal open parenthesis, (.  Nesting of parentheses is not allowed.",
+                                              ACIPString.ERROR));
                        if (null != errors)
-                            errors.append("Offset " + i + ": "
+                            errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                          + "Found an illegal open parenthesis, (.  Nesting of parentheses is not allowed.\n");
                        if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
                    } else {
@ -674,9 +689,10 @@ public class ACIPTshegBarScanner {
                    currentType = ACIPString.ERROR;
                } else {
                    if (ch == ')') {
-                        al.add(new ACIPString("Unexpected closing parenthesis )", ACIPString.ERROR));
+                        al.add(new ACIPString("Unexpected closing parenthesis, ), found.",
+                                              ACIPString.ERROR));
                        if (null != errors)
-                            errors.append("Offset " + i + ": "
+                            errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                          + "Unexpected closing parenthesis, ), found.\n");
                        if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
                    } else {
@ -724,10 +740,10 @@ public class ACIPTshegBarScanner {
                    al.add(new ACIPString(s.substring(i, i+1),
                                          ACIPString.TIBETAN_PUNCTUATION));
                } else {
-                    al.add(new ACIPString(s.substring(i, i+1),
+                    al.add(new ACIPString("A non-breaking tsheg, '.', appeared, but not like \"...,\" or \".,\" or \".dA\" or \".DA\".",
                                          ACIPString.ERROR));
                    if (null != errors)
-                        errors.append("Offset " + i + ": "
+                        errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                      + "A non-breaking tsheg, '.', appeared, but not like \"...,\" or \".,\" or \".dA\" or \".DA\".\n");
                    if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
                }
@ -772,19 +788,24 @@ public class ACIPTshegBarScanner {
                        al.add(new ACIPString(s.substring(startOfString, i),
                                              currentType));
                    }
-                    al.add(new ACIPString(s.substring(i, i+1),
-                                          ACIPString.ERROR));
-                    if (null != errors) {
-                        if ((int)ch == 65533) {
-                            errors.append("Offset " + i + ": "
+                    if ((int)ch == 65533) {
+                        al.add(new ACIPString("Found an illegal, unprintable character.",
+                                              ACIPString.ERROR));
+                        if (null != errors)
+                            errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                          + "Found an illegal, unprintable character.\n");
-                        } else if ('\\' == ch) {
-                            errors.append("Offset " + i + ": "
+                    } else if ('\\' == ch) {
+                        al.add(new ACIPString("Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly.  Sorry!  Please do complain to the maintainers.",
+                                              ACIPString.ERROR));
+                        if (null != errors)
+                            errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                          + "Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly.  Sorry!  Please do complain to the maintainers.\n");
-                        } else {
-                            errors.append("Offset " + i + ": "
+                    } else {
+                        al.add(new ACIPString("Found an illegal character, " + ch + ", with ordinal " + (int)ch + ".",
+                                              ACIPString.ERROR));
+                        if (null != errors)
+                            errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                          + "Found an illegal character, " + ch + ", with ordinal " + (int)ch + ".\n");
-                        }
                    }
                    if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
                    startOfString = i+1;
--- a/source/org/thdl/tib/text/ttt/PackageTest.java
+++ b/source/org/thdl/tib/text/ttt/PackageTest.java
@ -128,7 +128,7 @@ public class PackageTest extends TestCase {
        }

        {
-            TStackListList legalParses = pt.getUniqueParse();
+            TStackListList legalParses = pt.getUniqueParse(false);
            boolean goodness2 = (expectedLegalParses == null
                                 || expectedLegalParses.length == legalParses.size());
            for (int i = 0 ; i < legalParses.size(); i++) {
@ -139,18 +139,21 @@ public class PackageTest extends TestCase {
                                || expectedLegalParses.length < i+1
                                || n.equals(expectedLegalParses[i]));
                if (!okay || !goodness2)
-                    System.out.println("Legal parse " + (i) + " (from zero) is " + n + " (toString2=" + n.toString2() + ") and expected is " + expectedLegalParses[i]);
+                    System.out.println("Legal parse " + (i) + " (from zero) is " + n + " (toString2=" + n.toString2() + ") and expected is "
+                                       + ((i < expectedLegalParses.length)
+                                          ? expectedLegalParses[i]
+                                          : "not present"));
                assertTrue(okay);
            }
            if (!goodness2)
-                System.out.println("You expected " + expectedLegalParses.length + " legal parses, but there were instead " + legalParses.size() + " legal parses.");
+                System.out.println("You expected " + expectedLegalParses.length + " legal parses, but there were instead " + legalParses.size() + " legal parses for ACIP " + acip + ".");
            assertTrue(goodness2);
            TStackListList allLegalParses = pt.getLegalParses();
            TStackListList decentParses = pt.getNonIllegalParses();
            if (pt.getBestParse() == null) {
                if (legalParses.size() == 0) {
                    if (null != expectedBestParse && !"".equals(expectedBestParse)) {
-                        System.out.print("Expected is that there is a best parse \"" + expectedBestParse + "\" but there is no best parse for acip {" + acip + "}");
+                        System.out.print("Expected is that there is a best parse \"" + expectedBestParse + "\" but there is no best parse for ACIP {" + acip + "}");
                        assertTrue(false);
                    }
                    System.out.print("ACIPNoBestParseError: There is no best parse for the ACIP {" + acip + "}; ");
@ -163,7 +166,7 @@ public class PackageTest extends TestCase {
                    }
                } else {
                    if (legalParses.size() > 1) {
-                        System.out.println("ACIPTooManyLegalParsesError: see these " + legalParses.size() + " legal parses for acip " + acip + ": " + legalParses);
+                        System.out.println("ACIPTooManyLegalParsesError: see these " + legalParses.size() + " legal parses for ACIP " + acip + ": " + legalParses);
                        assertTrue(legalParses.size() == 2
                                   && (legalParses.get(0).size()
                                       == 1 + legalParses.get(1).size()));
@ -176,7 +179,7 @@ public class PackageTest extends TestCase {
                if (null != expectedBestParse) {
                    boolean good = pt.getBestParse().equals(expectedBestParse);
                    if (!good) {
-                        System.out.print("Expected best parse is \"" + expectedBestParse + "\" but the best parse is " + pt.getBestParse() + " for acip {" + acip + "}");
+                        System.out.print("Expected best parse is \"" + expectedBestParse + "\" but the best parse is " + pt.getBestParse() + " for ACIP {" + acip + "}");
                    }
                    assertTrue(good);
                }
@ -229,6 +232,116 @@ public class PackageTest extends TestCase {
     *  {@link TPairList#getACIPError()}, and {@link
     *  TPairList#recoverACIP()}. */
    public void testBreakACIPIntoChunks() {
+tstHelper("GASN"); // ambiguous with regard to prefix rules
+tstHelper("BARMA"); // ambiguous with regard to prefix rules
+tstHelper("MARDA"); // ambiguous with regard to prefix rules
+tstHelper("BBA"); // ambiguous with regard to prefix rules
+tstHelper("BBLUGS"); // ambiguous with regard to prefix rules
+tstHelper("BDRA"); // ambiguous with regard to prefix rules
+tstHelper("BDRAG"); // ambiguous with regard to prefix rules
+tstHelper("BDRA'I"); // ambiguous with regard to prefix rules
+tstHelper("BDRAL"); // ambiguous with regard to prefix rules
+tstHelper("BDRAN"); // ambiguous with regard to prefix rules
+tstHelper("BDRANGS"); // ambiguous with regard to prefix rules
+tstHelper("BDREN"); // ambiguous with regard to prefix rules
+tstHelper("BDRI"); // ambiguous with regard to prefix rules
+tstHelper("BDRIS"); // ambiguous with regard to prefix rules
+tstHelper("BDROL"); // ambiguous with regard to prefix rules
+tstHelper("BDRUG"); // ambiguous with regard to prefix rules
+tstHelper("BLCAG"); // ambiguous with regard to prefix rules
+tstHelper("BLCI"); // ambiguous with regard to prefix rules
+tstHelper("BLKONG"); // ambiguous with regard to prefix rules
+tstHelper("BLNGA"); // ambiguous with regard to prefix rules
+tstHelper("BLNGAG"); // ambiguous with regard to prefix rules
+tstHelper("BMA"); // ambiguous with regard to prefix rules
+tstHelper("BMYOD"); // ambiguous with regard to prefix rules
+tstHelper("BSALDA"); // ambiguous with regard to prefix rules
+tstHelper("BSAMS"); // ambiguous with regard to prefix rules
+tstHelper("BSEMS"); // ambiguous with regard to prefix rules
+tstHelper("BTSAMS"); // ambiguous with regard to prefix rules
+tstHelper("BTSIMS"); // ambiguous with regard to prefix rules
+tstHelper("DDANG"); // ambiguous with regard to prefix rules
+tstHelper("DDAR"); // ambiguous with regard to prefix rules
+tstHelper("DDRANGS"); // ambiguous with regard to prefix rules
+tstHelper("DDRUG"); // ambiguous with regard to prefix rules
+tstHelper("DNAG"); // ambiguous with regard to prefix rules
+tstHelper("DNOGS"); // ambiguous with regard to prefix rules
+tstHelper("DRBAN"); // ambiguous with regard to prefix rules
+tstHelper("DRGYU"); // ambiguous with regard to prefix rules
+tstHelper("DRTOG"); // ambiguous with regard to prefix rules
+tstHelper("DYA"); // ambiguous with regard to prefix rules
+tstHelper("DYAN"); // ambiguous with regard to prefix rules
+tstHelper("GDRA"); // ambiguous with regard to prefix rules
+tstHelper("GDRIM"); // ambiguous with regard to prefix rules
+tstHelper("GGAN"); // ambiguous with regard to prefix rules
+tstHelper("GGYUR"); // ambiguous with regard to prefix rules
+tstHelper("GLTAR"); // ambiguous with regard to prefix rules
+tstHelper("GLTUNG"); // ambiguous with regard to prefix rules
+tstHelper("GMA"); // ambiguous with regard to prefix rules
+tstHelper("GMAN"); // ambiguous with regard to prefix rules
+tstHelper("GMON"); // ambiguous with regard to prefix rules
+tstHelper("GRDEGS"); // ambiguous with regard to prefix rules
+tstHelper("GRDZU"); // ambiguous with regard to prefix rules
+tstHelper("GRGYA"); // ambiguous with regard to prefix rules
+tstHelper("GRNAGS"); // ambiguous with regard to prefix rules
+tstHelper("GRTAN"); // ambiguous with regard to prefix rules
+tstHelper("GRTOGS"); // ambiguous with regard to prefix rules
+tstHelper("GRTZO"); // ambiguous with regard to prefix rules
+tstHelper("GRTZOD"); // ambiguous with regard to prefix rules
+tstHelper("GRTZON"); // ambiguous with regard to prefix rules
+tstHelper("GSLA"); // ambiguous with regard to prefix rules
+tstHelper("GSNAD"); // ambiguous with regard to prefix rules
+tstHelper("GZLA"); // ambiguous with regard to prefix rules
+tstHelper("MBA"); // ambiguous with regard to prefix rules
+tstHelper("MBA'"); // ambiguous with regard to prefix rules
+tstHelper("MBI'I"); // ambiguous with regard to prefix rules
+tstHelper("MHA'A"); // ambiguous with regard to prefix rules
+tstHelper("MRDA"); // ambiguous with regard to prefix rules
+tstHelper("MRDO"); // ambiguous with regard to prefix rules
+tstHelper("MRDZOGS"); // ambiguous with regard to prefix rules
+tstHelper("MRGA"); // ambiguous with regard to prefix rules
+tstHelper("MRGAD"); // ambiguous with regard to prefix rules
+tstHelper("MRGAN"); // ambiguous with regard to prefix rules
+tstHelper("MRJES"); // ambiguous with regard to prefix rules
+tstHelper("MRJOD"); // ambiguous with regard to prefix rules
+tstHelper("MRTOGS"); // ambiguous with regard to prefix rules
+tstHelper("MRTOL"); // ambiguous with regard to prefix rules
+tstHelper("MRTZE'I"); // ambiguous with regard to prefix rules
+tstHelper("MRTZIGS"); // ambiguous with regard to prefix rules
+tstHelper("MSAM"); // ambiguous with regard to prefix rules
+tstHelper("MSGRIB"); // ambiguous with regard to prefix rules
+tstHelper("MSKYES"); // ambiguous with regard to prefix rules
+tstHelper("MSON"); // ambiguous with regard to prefix rules
+tstHelper("MSOS"); // ambiguous with regard to prefix rules
+tstHelper("MSTAMS"); // ambiguous with regard to prefix rules
+tstHelper("MSTAN"); // ambiguous with regard to prefix rules
+
+
+
+
+
+        // If you're not careful, you'll think GGYES is a legal
+        // Tibetan tsheg bar and parse it as {G}{G+YE}{S}.  But it's
+        // Sanskrit, really, because GA doesn't take a GA prefix.
+        // This doesn't occur in ACIP input files that I've seen, but
+        // GGYI (S1000I.INC) and GGYUR (S5275MC4.ACT) do occur.
+        tstHelper("GGYES", "{G}{G}{YE}{S}",
+                  new String[] { "{G}{G}{YE}{S}", "{G}{G+YE}{S}", "{G+G}{YE}{S}" },
+                  new String[] { },
+                  "{G+G}{YE}{S}");
+
+        tstHelper("DRUG", "{D}{RU}{G}",
+                  new String[] { "{D}{RU}{G}", "{D+RU}{G}" },
+                  new String[] { "{D+RU}{G}" },
+                  "{D+RU}{G}");
+
+
+        tstHelper("d+H+d+HA", "{d+}{H+}{d+}{HA}",
+                  new String[] { "{d+H+d+HA}" },
+                  new String[] { "{d+H+d+HA}" });
+
+        tstHelper("Gd+H+d+HA");
+
        tstHelper("AUTPA", "{AU}{T}{PA}",
                  new String[] { "{AU}{T}{PA}", "{AU}{T+PA}" },
                  new String[] { },
@ -249,7 +362,8 @@ public class PackageTest extends TestCase {
                  new String[] { "{G+R+VA}{'I}" });
        tstHelper("G-RVA'I", "{G-}{R}{VA}{'I}",
                  new String[] { "{G}{R+VA}{'I}" },
-                  new String[] { "{G}{R+VA}{'I}" });
+                  new String[] { },
+                  "{G}{R+VA}{'I}");
        tstHelper("RVA", "{R}{VA}",
                  new String[] { "{R+VA}" },
                  new String[] { "{R+VA}" });
@ -6967,8 +7081,8 @@ tstHelper("ZUR");
              "",
              "[TIBETAN_NON_PUNCTUATION:{LA}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, TIBETAN_NON_PUNCTUATION:{SGRUB}]"); // DLC FIXME
        shelp("PAS... LA",
-              "Offset 5: A non-breaking tsheg, '.', appeared, but not like \"...,\" or \".,\" or \".dA\" or \".DA\".\n",
-              "[TIBETAN_NON_PUNCTUATION:{PAS}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, ERROR:{.}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{LA}]");
+              "Offset 5 or maybe 5: A non-breaking tsheg, '.', appeared, but not like \"...,\" or \".,\" or \".dA\" or \".DA\".\n",
+              "[TIBETAN_NON_PUNCTUATION:{PAS}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, ERROR:{A non-breaking tsheg, '.', appeared, but not like \"...,\" or \".,\" or \".dA\" or \".DA\".}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{LA}]");
        shelp("PAS... LA",
              "",
              true,
@ -6983,28 +7097,28 @@ tstHelper("ZUR");
        shelp("", "", "[]");
        shelp("[DD]", "");
        shelp("[",
-              "Offset 0: Found an illegal open bracket (in context, this is [).  Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset END: Truly unmatched open bracket found.\n");
+              "Offset 0 or maybe 0: Found an illegal open bracket (in context, this is [).  Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset END: Truly unmatched open bracket found.\n");
        shelp("{",
-              "Offset 0: Found an illegal open bracket (in context, this is {).  Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset END: Truly unmatched open bracket found.\n");
+              "Offset 0 or maybe 0: Found an illegal open bracket (in context, this is {).  Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset END: Truly unmatched open bracket found.\n");
        shelp("DD", "");
        shelp("DD]",
-              "Offset 2: Found a truly unmatched close bracket, ] or }.\nOffset 2: Found a closing bracket without a matching open bracket.  Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
+              "Offset 2 or maybe 2: Found a truly unmatched close bracket, ] or }.\nOffset 2 or maybe 2: Found a closing bracket without a matching open bracket.  Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");

-        shelp("///NYA", "Offset 1: Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\nOffset END: Slashes are supposed to occur in pairs, but the input had an unmatched '/' character.\n");
+        shelp("///NYA", "Offset 1 or maybe 1: Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\nOffset END: Slashes are supposed to occur in pairs, but the input had an unmatched '/' character.\n");
        shelp("/NYA/", "");
        shelp("[?][BP][LS][DD1][DD2][DDD][DR][# (<{A COMMENT)}>]", "");
        shelp("[LS][# A [[[[[COMMENT][LS]",
-              "Offset 9: Found an open bracket within a [#COMMENT]-style comment.  Brackets may not appear in comments.\n"
-              + "Offset 10: Found an open bracket within a [#COMMENT]-style comment.  Brackets may not appear in comments.\n"
-              + "Offset 11: Found an open bracket within a [#COMMENT]-style comment.  Brackets may not appear in comments.\n"
-              + "Offset 12: Found an open bracket within a [#COMMENT]-style comment.  Brackets may not appear in comments.\n"
-              + "Offset 13: Found an open bracket within a [#COMMENT]-style comment.  Brackets may not appear in comments.\n");
+              "Offset 9 or maybe 9: Found an open bracket within a [#COMMENT]-style comment.  Brackets may not appear in comments.\n"
+              + "Offset 10 or maybe 10: Found an open bracket within a [#COMMENT]-style comment.  Brackets may not appear in comments.\n"
+              + "Offset 11 or maybe 11: Found an open bracket within a [#COMMENT]-style comment.  Brackets may not appear in comments.\n"
+              + "Offset 12 or maybe 12: Found an open bracket within a [#COMMENT]-style comment.  Brackets may not appear in comments.\n"
+              + "Offset 13 or maybe 13: Found an open bracket within a [#COMMENT]-style comment.  Brackets may not appear in comments.\n");
        shelp("[ILLEGAL COMMENT]",
-              "Offset 0: Found an illegal open bracket (in context, this is [ILLEGAL C...).  Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset 16: Found a closing bracket without a matching open bracket.  Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
+              "Offset 0 or maybe 0: Found an illegal open bracket (in context, this is [ILLEGAL C...).  Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset 16 or maybe 16: Found a closing bracket without a matching open bracket.  Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
        shelp("(BSKYABS GRO)", ""); // DLC WHAT ARE THESE FOR?
-        shelp("BSKYABS GRO)", "Offset 11: Unexpected closing parenthesis, ), found.\n");
+        shelp("BSKYABS GRO)", "Offset 11 or maybe 11: Unexpected closing parenthesis, ), found.\n");
        shelp("BSKYABS GRO(", "Offset END: Unmatched open parenthesis, (, found.\n");
-        shelp("((NESTAGE))", "Offset 1: Found an illegal open parenthesis, (.  Nesting of parentheses is not allowed.\nOffset 10: Unexpected closing parenthesis, ), found.\n");
+        shelp("((NESTAGE))", "Offset 1 or maybe 1: Found an illegal open parenthesis, (.  Nesting of parentheses is not allowed.\nOffset 10 or maybe 10: Unexpected closing parenthesis, ), found.\n");
        shelp("(BA)(PA)NYA(CA)", "");
        shelp("NYAx", "");
        shelp("NYA x", "");
@ -7033,9 +7147,9 @@ tstHelper("ZUR");
        shelp("(NYA ", "Offset END: Unmatched open parenthesis, (, found.\n");
        shelp("[*NYA ", "Offset END: Unmatched open bracket found.  A correction does not terminate.\n");
        shelp("?", "", "[QUESTION:{?}]");
-        shelp("KHAN~ BAR ", "Offset 4: Found an illegal character, ~, with ordinal 126.\n");
+        shelp("KHAN~ BAR ", "Offset 4 or maybe 4: Found an illegal character, ~, with ordinal 126.\n");
        shelp("[* Correction with []]",
-              "Offset 5: Found an illegal character, r, with ordinal 114.\nOffset 6: Found an illegal character, r, with ordinal 114.\nOffset 7: Found an illegal character, e, with ordinal 101.\nOffset 8: Found an illegal character, c, with ordinal 99.\nOffset 14: Found an illegal character, w, with ordinal 119.\nOffset 19: Found an illegal open bracket (in context, this is []]).  Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset 21: Found a closing bracket without a matching open bracket.  Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
+              "Offset 5 or maybe 5: Found an illegal character, r, with ordinal 114.\nOffset 6 or maybe 6: Found an illegal character, r, with ordinal 114.\nOffset 7 or maybe 7: Found an illegal character, e, with ordinal 101.\nOffset 8 or maybe 8: Found an illegal character, c, with ordinal 99.\nOffset 14 or maybe 14: Found an illegal character, w, with ordinal 119.\nOffset 19 or maybe 19: Found an illegal open bracket (in context, this is []]).  Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset 21 or maybe 21: Found a closing bracket without a matching open bracket.  Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");

        // DLC FIXME: the line SDIG PA'I GROGS PO'I LAG TU SON PAR 'GYUR PA is followed by a blank line.  Note that it's "PA", not "PA ", ending it.  Autocorrect to the latter.

@ -7051,8 +7165,8 @@ tstHelper("ZUR");
            uhelp(" 1\\ ", "\u0f0b\u0f21\u0f84\u0f0b");
        }
        shelp("K\\,",
-              "Offset 1: Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly.  Sorry!  Please do complain to the maintainers.\n",
-              "[TIBETAN_NON_PUNCTUATION:{K}, ERROR:{\\}, TIBETAN_PUNCTUATION:{,}]");
+              "Offset 1 or maybe 1: Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly.  Sorry!  Please do complain to the maintainers.\n",
+              "[TIBETAN_NON_PUNCTUATION:{K}, ERROR:{Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly.  Sorry!  Please do complain to the maintainers.}, TIBETAN_PUNCTUATION:{,}]");


        shelp("MTHAR%", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR%}]");
@ -7073,15 +7187,15 @@ tstHelper("ZUR");
        shelp("@01A.3 ", "", "[FOLIO_MARKER:{@01A.3}, TIBETAN_PUNCTUATION:{ }]");
        shelp("@001 ", "", "[FOLIO_MARKER:{@001}, TIBETAN_PUNCTUATION:{ }]");
        shelp("@19-20A",
-              "Offset 0: Found an illegal at sign, @ (in context, this is @19-20A).  @012B is an example of a legal folio marker.\n",
-              "[ERROR:{@}, TIBETAN_NON_PUNCTUATION:{19-20A}]");  // DLC FIXME: yes it occurs in the kangyur.
+              "Offset 0 or maybe 0: Found an illegal at sign, @ (in context, this is @19-20A).  @012B is an example of a legal folio marker.\n",
+              "[ERROR:{Found an illegal at sign, @ (in context, this is @19-20A).  @012B is an example of a legal folio marker.}, TIBETAN_NON_PUNCTUATION:{19-20A}]");  // DLC FIXME: yes it occurs in the kangyur.
        shelp("@[7B]", "");
        shelp("@012A.3KA",
              "",
              "[FOLIO_MARKER:{@012A.3}, TIBETAN_NON_PUNCTUATION:{KA}]");
        shelp("@012A.34",
-              "Offset 0: Found an illegal at sign, @ (in context, this is @012A.34).  This folio marker has a period, '.', at the end of it, which is illegal.\n",
-              "[ERROR:{@012A.}, TIBETAN_NON_PUNCTUATION:{34}]");
+              "Offset 0 or maybe 0: Found an illegal at sign, @ (in context, this is @012A.34).  This folio marker has a period, '.', at the end of it, which is illegal.\n",
+              "[ERROR:{Found an illegal at sign, @ (in context, this is @012A.34).  This folio marker has a period, '.', at the end of it, which is illegal.}, TIBETAN_NON_PUNCTUATION:{34}]");
        shelp("@[07B]", "");
        shelp("@[00007B]", "");
        shelp("@7B", "");
@ -7097,8 +7211,8 @@ tstHelper("ZUR");
        shelp("{ DD }", "", "[DD:{{ DD }}]"); // TD3790E2.ACT
        shelp("{ BP }", "", "[BP:{{ BP }}]"); // TD3790E2.ACT
        shelp("//NYA\\\\",
-              "Offset 1: Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\nOffset 5: Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly.  Sorry!  Please do complain to the maintainers.\nOffset 6: Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly.  Sorry!  Please do complain to the maintainers.\n",
-              "[START_SLASH:{/}, ERROR:{//}, END_SLASH:{/}, TIBETAN_NON_PUNCTUATION:{NYA}, ERROR:{\\}, ERROR:{\\}]");
+              "Offset 1 or maybe 1: Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\nOffset 5 or maybe 5: Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly.  Sorry!  Please do complain to the maintainers.\nOffset 6 or maybe 6: Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly.  Sorry!  Please do complain to the maintainers.\n",
+              "[START_SLASH:{/}, ERROR:{Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.}, END_SLASH:{/}, TIBETAN_NON_PUNCTUATION:{NYA}, ERROR:{Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly.  Sorry!  Please do complain to the maintainers.}, ERROR:{Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly.  Sorry!  Please do complain to the maintainers.}]");

    }
    private static void uhelp(String acip) {
@ -7106,7 +7220,7 @@ tstHelper("ZUR");
    }
    private static void uhelp(String acip, String expectedUnicode) {
        StringBuffer errors = new StringBuffer();
-        String unicode = ACIPConverter.convertToUnicode(acip, errors);
+        String unicode = ACIPConverter.convertToUnicode(acip, errors, null, true);
        if (null == unicode) {
            if (null != expectedUnicode && "none" != expectedUnicode) {
                System.out.println("No unicode exists for " + acip + " but you expected " + org.thdl.tib.text.tshegbar.UnicodeUtils.unicodeStringToPrettyString(expectedUnicode));
@ -8729,22 +8843,22 @@ tstHelper("shKA");
 }
 /* DLC FIXME: add test cases: from R0021F.ACE: ambiguous Tibetan/Sanskrit:

- BDA'  þþþþ 
-B+DA   þþþ
-DBANG  þþþ 
-D+BA   þþþ
-DGA'  þþþþ 
-D+GA   þþþ
-DGRA   þþþ 
-D+GRA  þþþ
-DGYESþþþþþ 
-D+GYA  þþþ 
-DMAR  þþþþ
-D+MA   þþþ
-GDA'  þþþþ
-G+DA   þþþ
-GNAD  þþþþ
-G+NA   þþþ
-MNA'  þþþþ
-M+NA    þþþ 
+BDA'
+B+DA
+DBANG
+D+BA
+DGA'
+D+GA
+DGRA
+D+GRA
+DGYES
+D+GYA
+DMAR
+D+MA
+GDA'
+G+DA
+GNAD
+G+NA
+MNA'
+M+NA
 */
--- a/source/org/thdl/tib/text/ttt/TPairList.java
+++ b/source/org/thdl/tib/text/ttt/TPairList.java
@ -520,7 +520,8 @@ class TPairList {
     *  corresponds to exactly one Tibetan grapheme cluster (i.e.,
     *  stack).  Note that U+0F7F (ACIP {:}) is part of a stack, not a
     *  stack all on its own. */
-    void populateWithTGCPairs(ArrayList pl, ArrayList indexList, int index) {
+    void populateWithTGCPairs(ArrayList pl,
+                              ArrayList indexList, int index) {
        int sz = size();
        if (sz == 0) {
            return;
@ -540,8 +541,8 @@ class TPairList {
            // The last pair:
            TPair p = get(i);
            ThdlDebug.verify(!"+".equals(p.getRight()));
-            int where;
            boolean add_U0F7F = false;
+            int where;
            if (p.getRight() != null
                && (where = p.getRight().indexOf(':')) >= 0) {
                // this ':' guy is his own TGCPair.
@ -579,27 +580,21 @@ class TPairList {
            }
            TGCPair tp;
            indexList.add(new Integer(index));
-            tp = new TGCPair(lWylie.toString()
-                             + (hasNonAVowel
-                                ? ACIPRules.getWylieForACIPVowel(p.getRight())
-                                : ""),
+            tp = new TGCPair(lWylie.toString(),
+                             (hasNonAVowel
+                              ? ACIPRules.getWylieForACIPVowel(p.getRight())
+                              : ""),
                             (isNumeric
-                              ? TGCPair.OTHER
-                              : (hasNonAVowel
-                                 ? (isSanskrit
-                                    ? TGCPair.SANSKRIT_WITH_VOWEL
-                                    : (isTibetan
-                                       ? TGCPair.CONSONANTAL_WITH_VOWEL
-                                       : TGCPair.OTHER))
-                                 : (isSanskrit
-                                    ? TGCPair.SANSKRIT_WITHOUT_VOWEL
-                                    : (isTibetan
-                                       ? TGCPair.CONSONANTAL_WITHOUT_VOWEL
-                                       : TGCPair.OTHER)))));
+                              ? TGCPair.TYPE_OTHER
+                              : (isSanskrit
+                                 ? TGCPair.TYPE_SANSKRIT
+                                 : (isTibetan
+                                    ? TGCPair.TYPE_TIBETAN
+                                    : TGCPair.TYPE_OTHER))));
            pl.add(tp);
            if (add_U0F7F) {
                indexList.add(new Integer(index));
-                pl.add(new TGCPair("H", TGCPair.OTHER));
+                pl.add(new TGCPair("H", null, TGCPair.TYPE_OTHER));
            }
        }
    }
--- a/source/org/thdl/tib/text/ttt/TParseTree.java
+++ b/source/org/thdl/tib/text/ttt/TParseTree.java
@ -91,7 +91,7 @@ class TParseTree {
        ParseIterator pi = getParseIterator();
        while (pi.hasNext()) {
            TStackList sl = pi.next();
-            if (sl.isLegalTshegBar().isLegal) {
+            if (sl.isLegalTshegBar(false).isLegal) {
                sll.add(sl);
            }
        }
@ -118,12 +118,12 @@ class TParseTree {
     *  a unique non-illegal parse, you get it.  If there's not a
     *  unique answer, null is returned. */
    // {TZANDRA} is not solved by this, DLC NOW.  Solve PADMA PROBLEM!
-
    // DLC by using this we can get rid of single-sanskrit-gc, eh?
    public TStackList getBestParse() {
-        TStackListList up = getUniqueParse();
+        TStackListList up = getUniqueParse(false);
        if (up.size() == 1)
            return up.get(0);
+
        up = getNonIllegalParses();
        int sz = up.size();
        if (sz == 1) {
@ -192,14 +192,17 @@ class TParseTree {
     *  legal parses if there two or more equally good parses.  By
     *  &quot;legal&quot;, we mean a sequence of stacks that is legal
     *  by the rules of Tibetan tsheg bar syntax (sometimes called
-     *  spelling). */
-    public TStackListList getUniqueParse() {
+     *  spelling).
+     *  @param noPrefixTests true if you want to pretend that every
+     *  stack can take every prefix, which is not the case in
+     *  reality */
+    public TStackListList getUniqueParse(boolean noPrefixTests) {
        TStackListList allLegalParses = new TStackListList(2); // save memory
        TStackListList legalParsesWithVowelOnRoot = new TStackListList(1);
        ParseIterator pi = getParseIterator();
        while (pi.hasNext()) {
            TStackList sl = pi.next();
-            BoolPair bpa = sl.isLegalTshegBar();
+            BoolPair bpa = sl.isLegalTshegBar(noPrefixTests);
            if (bpa.isLegal) {
                if (bpa.isLegalAndHasAVowelOnRoot)
                    legalParsesWithVowelOnRoot.add(sl);
@ -253,13 +256,23 @@ class TParseTree {
    public String getWarning(boolean paranoid,
                             TPairList pl,
                             String originalACIP) {
-        TStackListList up = getUniqueParse();
+
+        {
+            TStackList bestParse = getBestParse();
+            TStackListList noPrefixTestsUniqueParse = getUniqueParse(true);
+            if (noPrefixTestsUniqueParse.size() == 1
+                && !noPrefixTestsUniqueParse.get(0).equals(bestParse)) {
+                return "Warning: We're going with " + bestParse + ", but only because our knowledge of prefix rules says that " + noPrefixTestsUniqueParse.get(0) + " is not a legal Tibetan tsheg bar (\"syllable\")";
+            }
+        }
+
+        TStackListList up = getUniqueParse(false);
        if (null == up || up.size() != 1) {
            boolean isLastStack[] = new boolean[1];
            TStackListList nip = getNonIllegalParses();
            if (nip.size() != 1) {
                if (null == getBestParse()) {
-                    return "There's not even a unique, non-illegal parse for ACIP {" + ((null != originalACIP) ? originalACIP : recoverACIP()) + "}";
+                    return "Warning: There's not even a unique, non-illegal parse for ACIP {" + ((null != originalACIP) ? originalACIP : recoverACIP()) + "}";
                } else {
                    if (getBestParse().hasStackWithoutVowel(pl, isLastStack)) {
                        if (isLastStack[0]) {
@ -269,7 +282,7 @@ class TParseTree {
                        }
                    }
                    if (paranoid) {
-                        return "Though the ACIP {" + ((null != originalACIP) ? originalACIP : recoverACIP()) + "} is unambiguous, it would be more computer-friendly if + signs were used to stack things because there are two (or more) ways to interpret this ACIP if you're not careful.";
+                        return "Warning: Though the ACIP {" + ((null != originalACIP) ? originalACIP : recoverACIP()) + "} is unambiguous, it would be more computer-friendly if + signs were used to stack things because there are two (or more) ways to interpret this ACIP if you're not careful.";
                    }
                }
            } else {
--- a/source/org/thdl/tib/text/ttt/TStackList.java
+++ b/source/org/thdl/tib/text/ttt/TStackList.java
@ -125,15 +125,17 @@ class TStackList {
     *  Tibetan syntax (sometimes called rules of spelling).  If this
     *  is legal, then {@link BoolPair#isLegalAndHasAVowelOnRoot} will
     *  be true if and only if there is an explicit {A} vowel on the
-     *  root stack. */
-    public BoolPair isLegalTshegBar() {
-        // DLC handle PADMA and other Tibetanized Sanskrit fellows.  Right now we only handle single-stack guys.
+     *  root stack.
+     *  @param noPrefixTests true if you want to pretend that every
+     *  stack can take every prefix, which is not the case in
+     *  reality */
+    public BoolPair isLegalTshegBar(boolean noPrefixTests) {
+        // DLC handle PADMA and other Tibetanized Sanskrit fellows consistently.  Right now we only treat single-stack Sanskrit guys as legal.

        TTGCList tgcList = new TTGCList(this);
        StringBuffer warnings = new StringBuffer();
        String candidateType
-            = TibTextUtils.getClassificationOfTshegBar(tgcList, warnings);
-        // System.out.println("DLC: " + toString() + " has candidateType " + candidateType + " and warnings " + warnings);
+            = TibTextUtils.getClassificationOfTshegBar(tgcList, warnings, noPrefixTests);

        // preliminary answer:
        boolean isLegal = (candidateType != "invalid");