TMW->Wylie conversion now takes advantage of prefix rules, the rules

that say "ya can take a ga prefix" etc. The ACIP->Unicode converter now gives warnings (optionally, and by default, inline). This converter now produces output even when lexical errors occur, but the output has errors and warnings inline.
2003-08-23 22:03:37 +00:00 · 2003-08-23 22:03:37 +00:00 · d5ad760230
commit d5ad760230
parent 21ef657921
14 changed files with 678 additions and 270 deletions
--- a/source/org/thdl/tib/input/DuffPaneTest.java
+++ b/source/org/thdl/tib/input/DuffPaneTest.java
@ -102,19 +102,23 @@ public class DuffPaneTest extends TestCase {
        ensureKeysGiveCorrectWylie("gya");
        ensureKeysGiveCorrectWylie("g.ya");
        ensureKeysGiveCorrectWylie("bya");
-        ensureKeysGiveCorrectWylie("b.ya");
+        ensureKeysGiveCorrectWylie("b.ya", "baya");
        ensureKeysGiveCorrectWylie("mya");
-        ensureKeysGiveCorrectWylie("m.ya");
-        ensureKeysGiveCorrectWylie("'ya");
-        ensureKeysGiveCorrectWylie("'.ya", "'ya");
-        ensureKeysGiveCorrectWylie("dya");
-        ensureKeysGiveCorrectWylie("d.ya", "dya");
+        ensureKeysGiveCorrectWylie("m.ya", "maya");
+        ensureKeysGiveCorrectWylie("'ya", "'aya");
+        ensureKeysGiveCorrectWylie("'.ya", "'aya");
+        ensureKeysGiveCorrectWylie("dya",
+                                   "daya");
+        ensureKeysGiveCorrectWylie("d.ya",
+                                   "daya");
        ensureKeysGiveCorrectWylie("grwa");
-        ensureKeysGiveCorrectWylie("g.rwa");
+        ensureKeysGiveCorrectWylie("g.rwa",
+                                   "garwa");
        ensureKeysGiveCorrectWylie("gra");
        ensureKeysGiveCorrectWylie("dra");
        ensureKeysGiveCorrectWylie("drwa");
-        ensureKeysGiveCorrectWylie("d.rwa");
+        ensureKeysGiveCorrectWylie("d.rwa",
+                                   "darwa");
        ensureKeysGiveCorrectWylie("g.r", "gar");
        ensureKeysGiveCorrectWylie("d.r", "dar");
        ensureKeysGiveCorrectWylie("'.r", "'ar");
@ -134,7 +138,7 @@ public class DuffPaneTest extends TestCase {
        ensureKeysGiveCorrectWylie("t.sa",
                                   "tas");

-        ensureKeysGiveCorrectWylie("d.za");
+        ensureKeysGiveCorrectWylie("d.za", "daza");
        ensureKeysGiveCorrectWylie("dza");

        ensureKeysGiveCorrectWylie("s.ha",
@ -219,7 +223,7 @@ public class DuffPaneTest extends TestCase {

        ensureKeysGiveCorrectWylie("b.lag");
        ensureKeysGiveCorrectWylie("blg",
-                                   "blga");
+                                   "balga");

        ensureKeysGiveCorrectWylie("b.las",
                                   "bals");
@ -244,21 +248,24 @@ public class DuffPaneTest extends TestCase {
                                   "bras");
        ensureKeysGiveCorrectWylie("bras");

-        ensureKeysGiveCorrectWylie("d.wa");
+        ensureKeysGiveCorrectWylie("d.wa",
+                                   "dawa");
        ensureKeysGiveCorrectWylie("dawa",
-                                   "d.wa");
+                                   "dawa");
        ensureKeysGiveCorrectWylie("dwa");

-        ensureKeysGiveCorrectWylie("g.wa");
+        ensureKeysGiveCorrectWylie("g.wa",
+                                   "gawa");
        ensureKeysGiveCorrectWylie("gawa",
-                                   "g.wa");
+                                   "gawa");
        ensureKeysGiveCorrectWylie("gwa");

        ensureKeysGiveCorrectWylie("'.wa",
-                                   "'wa");
+                                   "'awa");
        ensureKeysGiveCorrectWylie("'awa",
-                                   "'wa");
-        ensureKeysGiveCorrectWylie("'wa");
+                                   "'awa");
+        ensureKeysGiveCorrectWylie("'wa",
+                                   "'awa");

        ensureKeysGiveCorrectWylie("gyg",
                                   "g.yag");
@ -282,7 +289,8 @@ public class DuffPaneTest extends TestCase {
        ensureKeysGiveCorrectWylie("ma.a.asa",
                                   "mas");

-        ensureKeysGiveCorrectWylie("'ka");
+        ensureKeysGiveCorrectWylie("'ka",
+                                   "'aka");

        ensureKeysGiveCorrectWylie("'gas");

@ -319,8 +327,9 @@ public class DuffPaneTest extends TestCase {
                                   "lamanga");

        ensureKeysGiveCorrectWylie("b.m.ng",
-                                   "bmang");
-        ensureKeysGiveCorrectWylie("bmang");
+                                   "bamanga");
+        ensureKeysGiveCorrectWylie("bmang",
+                                   "bamanga");

        ensureKeysGiveCorrectWylie("gdams");
        ensureKeysGiveCorrectWylie("g.d.m.s.",
@ -372,7 +381,7 @@ public class DuffPaneTest extends TestCase {
        ensureKeysGiveCorrectWylie("fivikikhigingicichijinyitithidinipiphibimitsitshidziwizhizi'iyirilishisihiTiThiDiNiShi");

        ensureKeysGiveCorrectWylie("don't touch my coffee/that makes me very angry/supersize my drink",
-                                   "dona'ata tocha mya cofafe/thata mkes me veraya angaraya/superasize mya drinaka");
+                                   "dona'ata tocha mya cofafe/thata makesa me veraya angaraya/superasize mya drinaka");

    }
 }
--- a/source/org/thdl/tib/input/TMW_RTF_TO_THDL_WYLIETest1ResultConversion.expected
+++ b/source/org/thdl/tib/input/TMW_RTF_TO_THDL_WYLIETest1ResultConversion.expected
@ -28,7 +28,7 @@ zur mig nyag phran tsam gyis dge ba'i gzugs can 'dus ma byas//\par
 \par
 yid 'ong bzhin ras zla gzhon 'khor lo gnyis skyes la//\par
 'khrul ba ster yang 'phyang mo sel byed mgo skyes kyi//\par
-bai DUr mthing kha'i lan bu rab 'phyang dbyangs can ma//\par
+bai DUra mthing kha'i lan bu rab 'phyang dbyangs can ma//\par
 smra ba'i dbang phyug ngag gi rgyal po nyer grub mdzod//\par
 \par
 gangs can lha lam yangs pa'i khyon 'dir rgyal ba'i bstan pa bcu gnyis bdag po'i gur khang mchog/\par
--- a/source/org/thdl/tib/text/TGCPair.java
+++ b/source/org/thdl/tib/text/TGCPair.java
@ -25,7 +25,7 @@ package org.thdl.tib.text;
    context-insensitive THDL Extended Wylie representation.  NOTE
    WELL: this is not a real grapheme cluster; I'm misusing the term
    (FIXME).  It's actually whole or part of one.  It's part of one
-    when this is a vowel or U+0F7F alone.
+    when this is U+0F7F alone.

    @author David Chandler */
 public class TGCPair {
@ -37,14 +37,84 @@ public class TGCPair {
    public static final int SANSKRIT_WITHOUT_VOWEL = 5;
    public static final int SANSKRIT_WITH_VOWEL = 6;

-    public String wylie;
-    public int classification;
-    public TGCPair(String wylie, int classification) {
-        this.wylie = wylie;
-        this.classification = classification;
+    public static final int TYPE_OTHER = 31;
+    public static final int TYPE_SANSKRIT = 32;
+    public static final int TYPE_TIBETAN = 33;
+
+    // Sanskrit or Tibetan consonant, or number, or oddball:
+    private String consonantWylie;
+    private String vowelWylie;
+    public String getConsonantWylie() {
+        return consonantWylie;
    }
+    public String getVowelWylie() {
+        return vowelWylie;
+    }
+    /** Cludge. */
+    public void setWylie(String x) {
+        consonantWylie = x;
+        vowelWylie = null;
+    }
+    public String getWylie() {
+        StringBuffer b = new StringBuffer();
+        if (consonantWylie != null) {
+            // we may have {p-y}, but the user wants to see {py}.
+            for (int i = 0; i < consonantWylie.length(); i++) {
+                char ch = consonantWylie.charAt(i);
+                if ('-' != ch)
+                    b.append(ch);
+            }
+        }
+        if (vowelWylie != null)
+            b.append(vowelWylie);
+        return b.toString();
+    }
+    public int classification;
+    /** Constructs a new TGCPair with (Tibetan or Sanskrit) consonant
+     *  consonantWylie and vowel vowelWylie.  Use
+     *  classification==TYPE_OTHER for numbers, lone vowels, marks,
+     *  etc.  Use classification==TYPE_TIBETAN for Tibetan (not
+     *  Tibetanized Sanskrit) and classification=TYPE_SANSKRIT for
+     *  Tibetanized Sanskrit. */
+    public TGCPair(String consonantWylie, String vowelWylie, int classification) {
+        if ("".equals(vowelWylie))
+            vowelWylie = null;
+        // Technically, we don't need the following check, but it's
+        // nice for consistency's sake.
+        if ("".equals(consonantWylie))
+            consonantWylie = null;
+
+        // DLC FIXME: for speed, make these assertions:
+        if (classification != TYPE_OTHER
+            && classification != TYPE_TIBETAN
+            && classification != TYPE_SANSKRIT) {
+            throw new IllegalArgumentException("Bad classification " + classification + ".");
+        }
+        int realClassification = -37;
+        if (vowelWylie == null && classification == TYPE_TIBETAN)
+            realClassification = CONSONANTAL_WITHOUT_VOWEL;
+        if (vowelWylie != null && classification == TYPE_TIBETAN)
+            realClassification = CONSONANTAL_WITH_VOWEL;
+        if (vowelWylie == null && classification == TYPE_SANSKRIT)
+            realClassification = SANSKRIT_WITHOUT_VOWEL;
+        if (vowelWylie != null && classification == TYPE_SANSKRIT)
+            realClassification = SANSKRIT_WITH_VOWEL;
+        if (consonantWylie == null) {
+            if (classification != TYPE_OTHER)
+                throw new IllegalArgumentException("That's the very definition of a lone vowel.");
+            realClassification = LONE_VOWEL;
+        } else {
+            if (classification == TYPE_OTHER)
+                realClassification = OTHER;
+        }
+
+        this.consonantWylie = consonantWylie;
+        this.vowelWylie = vowelWylie;
+        this.classification = realClassification;
+    }
+
    public String toString() {
-        return "<TGCPair wylie=" + wylie + " classification="
+        return "<TGCPair wylie=" + getWylie() + " classification="
            + classification + "/>";
    }
 }
--- a/source/org/thdl/tib/text/TibTextUtils.java
+++ b/source/org/thdl/tib/text/TibTextUtils.java
@ -25,6 +25,9 @@ import javax.swing.text.rtf.RTFEditorKit;
 import java.io.*;

 import org.thdl.util.ThdlDebug;
+import org.thdl.tib.text.tshegbar.LegalTshegBar;
+import org.thdl.tib.text.tshegbar.UnicodeConstants;
+import org.thdl.tib.text.tshegbar.UnicodeUtils;

 /**
 * Provides methods for converting back and forth between Extended
@ -846,86 +849,64 @@ public class TibTextUtils implements THDLWylieConstants {
        // sz is an overestimate (speeds us up, wastes some memory).
        TMWGCList gcs = new TMWGCList(sz);

-        StringBuffer buildingUpGc = new StringBuffer();
+        StringBuffer buildingUpVowel = new StringBuffer(); // for {cui}, we append to this guy twice.
+        String nonVowelWylie = null; // for the "c" in {cui}
+        int pairType = TGCPair.TYPE_OTHER;

-        boolean consonantal_with_vowel = false;
-        boolean buildingUpSanskrit = false;
        for (int i = 0; i < sz; i++) {
            DuffCode dc = (DuffCode)glyphList.get(i);
            String wylie = TibetanMachineWeb.getWylieForGlyph(dc, noSuchWylie);
-            boolean containsWylieVowel = false;
            boolean buildingUpSanskritNext = false;
            if ((buildingUpSanskritNext
                 = TibetanMachineWeb.isWylieSanskritConsonantStack(wylie))
                || TibetanMachineWeb.isWylieTibetanConsonantOrConsonantStack(wylie)) {
-                if (buildingUpGc.length() > 0) {
-                    gcs.add(new TGCPair(buildingUpGc.toString(),
-                                        consonantal_with_vowel
-                                        ? (buildingUpSanskrit
-                                           ? TGCPair.SANSKRIT_WITH_VOWEL
-                                           : TGCPair.CONSONANTAL_WITH_VOWEL)
-                                        : (buildingUpSanskrit
-                                           ? TGCPair.SANSKRIT_WITHOUT_VOWEL
-                                           : TGCPair.CONSONANTAL_WITHOUT_VOWEL)));
-                    buildingUpGc.delete(0, buildingUpGc.length());
+                if (buildingUpVowel.length() > 0 || null != nonVowelWylie) {
+                    gcs.add(new TGCPair(nonVowelWylie,
+                                        buildingUpVowel.toString(),
+                                        pairType));
+                    buildingUpVowel.delete(0, buildingUpVowel.length());
                }
-                buildingUpGc.append(wylie);
-                consonantal_with_vowel = false;
-                buildingUpSanskrit = buildingUpSanskritNext;
-            } else if ((containsWylieVowel
-                        = TibetanMachineWeb.isWylieAdornmentAndContainsVowel(wylie))
+                // We want {p-y}, not {py}.
+                nonVowelWylie
+                    = TibetanMachineWeb.getHashKeyForGlyph(dc.getFontNum(), dc.getCharNum());
+                pairType = (buildingUpSanskritNext
+                            ? TGCPair.TYPE_SANSKRIT
+                            : TGCPair.TYPE_TIBETAN);
+            } else if (TibetanMachineWeb.isWylieAdornmentAndContainsVowel(wylie)
                       || TibetanMachineWeb.isWylieAdornment(wylie)) {
-
-                if (buildingUpGc.length() > 0) {
-                    buildingUpGc.append(wylie);
-                    if (containsWylieVowel) {
-                        if (debug)
-                            System.out.println("DEBUG: with_vowel is true thanks to " + wylie);
-                        consonantal_with_vowel = true;
-                    }
-                    // do not clear; we might have {cui} or {hUM}, e.g.
-                } else {
-                    gcs.add(new TGCPair(wylie,
-                                        TGCPair.LONE_VOWEL));
-                    consonantal_with_vowel = false;
-                }
+                buildingUpVowel.append(wylie);
            } else {
                // number or weird thing:

-                if (buildingUpGc.length() > 0) {
-                    gcs.add(new TGCPair(buildingUpGc.toString(),
-                                        consonantal_with_vowel
-                                        ? (buildingUpSanskrit
-                                           ? TGCPair.SANSKRIT_WITH_VOWEL
-                                           : TGCPair.CONSONANTAL_WITH_VOWEL)
-                                        : (buildingUpSanskrit
-                                           ? TGCPair.SANSKRIT_WITHOUT_VOWEL
-                                           : TGCPair.CONSONANTAL_WITHOUT_VOWEL)));
-                    buildingUpGc.delete(0, buildingUpGc.length());
+                if (buildingUpVowel.length() > 0 || null != nonVowelWylie) {
+                    gcs.add(new TGCPair(nonVowelWylie,
+                                        buildingUpVowel.toString(),
+                                        pairType));
+                    buildingUpVowel.delete(0, buildingUpVowel.length());
+                    nonVowelWylie = null;
                }
-                gcs.add(new TGCPair(wylie, TGCPair.OTHER));
-                consonantal_with_vowel = false;
-                buildingUpSanskrit = false;
+                gcs.add(new TGCPair(wylie, null, TGCPair.TYPE_OTHER));
+                pairType = TGCPair.TYPE_OTHER;
            }
        }
-        if (buildingUpGc.length() > 0) {
-            gcs.add(new TGCPair(buildingUpGc.toString(),
-                                consonantal_with_vowel
-                                ? (buildingUpSanskrit
-                                   ? TGCPair.SANSKRIT_WITH_VOWEL
-                                   : TGCPair.CONSONANTAL_WITH_VOWEL)
-                                : (buildingUpSanskrit
-                                   ? TGCPair.SANSKRIT_WITHOUT_VOWEL
-                                   : TGCPair.CONSONANTAL_WITHOUT_VOWEL)));
+        if (buildingUpVowel.length() > 0 || null != nonVowelWylie) {
+            gcs.add(new TGCPair(nonVowelWylie,
+                                buildingUpVowel.toString(),
+                                pairType));
        }
-        buildingUpGc = null;
        return gcs;
    }


+    /** Returns a string that classifies gcs as a legal Tibetan tsheg
+     *  bar, a single Sanskrit grapheme cluster
+     *  ("single-sanskrit-gc"), or invalid ("invalid").  If
+     *  noPrefixTests is true, then ggyi will be seen as a
+     *  "prefix-root", even though gya doesn't take a ga prefix. */
    public static String getClassificationOfTshegBar(TGCList gcs,
                                                     // DLC the warnings are Wylie-specific
-                                                     StringBuffer warnings) {
+                                                     StringBuffer warnings,
+                                                     boolean noPrefixTests) {
        String candidateType = null;
        // Now that we have grapheme clusters, see if they match any
        // of the "legal tsheg bars":
@ -937,10 +918,11 @@ public class TibTextUtils implements THDLWylieConstants {
                || TGCPair.SANSKRIT_WITH_VOWEL == cls)
                return "single-sanskrit-gc";
        }
+        TGCPair lastPair = null;
        for (int i = 0; i < sz; i++) {
            TGCPair tp = gcs.get(i);
            int cls = tp.classification;
-            String wylie = tp.wylie;
+            String wylie = tp.getWylie();
            if (TGCPair.OTHER == cls) {
                if (TibetanMachineWeb.isWylieNumber(wylie)) {
                    if (null == candidateType) {
@ -977,25 +959,44 @@ public class TibTextUtils implements THDLWylieConstants {
                            // peek ahead to distinguish between ba's,
                            // ba'ala and ba'am:
                            TGCPair nexttp = (i+1 < sz) ? gcs.get(i+1) : null;
-                            String nextwylie = (nexttp == null) ? "" : nexttp.wylie;
+                            String nextwylie = (nexttp == null) ? "" : nexttp.getWylie();
                            if (isAppendageNonVowelWylie(nextwylie)) {
                                candidateType = "maybe-appendaged-prefix/root";
                            } else {
-                                candidateType = "prefix/root-root/suffix";
+                                if (noPrefixTests
+                                    || isLegalPrefixRootCombo(lastPair.getConsonantWylie(),
+                                                              tp.getConsonantWylie()))
+                                    candidateType = "prefix/root-root/suffix";
+                                else
+                                    candidateType = "root-suffix";
                            }
                        } else if (TibetanMachineWeb.isWylieRight(wylie)) {
-                            candidateType = "prefix/root-root/suffix";
+                            if (noPrefixTests
+                                || isLegalPrefixRootCombo(lastPair.getConsonantWylie(),
+                                                          tp.getConsonantWylie()))
+                                candidateType = "prefix/root-root/suffix";
+                            else
+                                candidateType = "root-suffix";
                        } else if (TibetanMachineWeb.isWylieAchungAppendage(wylie)) {
                            candidateType = "appendaged-prefix/root";
                        } else {
-                            candidateType = "prefix-root";
+                            if (noPrefixTests
+                                || isLegalPrefixRootCombo(lastPair.getConsonantWylie(),
+                                                          tp.getConsonantWylie()))
+                                candidateType = "prefix-root";
+                            else {
+                                if (null != warnings)
+                                    warnings.append("Found what would be a prefix-root combo, but the root stack with wylie " + wylie + " does not take the prefix with wylie " + lastPair.getConsonantWylie());
+                                candidateType = "invalid";
+                                break;
+                            }
                        }
                    } else if ("root" == candidateType) {
                        if (ACHUNG.equals(wylie)) {
                            // peek ahead to distinguish between pa's,
                            // pa'ala and pa'am:
                            TGCPair nexttp = (i+1 < sz) ? gcs.get(i+1) : null;
-                            String nextwylie = (nexttp == null) ? "" : nexttp.wylie;
+                            String nextwylie = (nexttp == null) ? "" : nexttp.getWylie();
                            if (isAppendageNonVowelWylie(nextwylie)) {
                                candidateType = "maybe-appendaged-root";
                            } else {
@ -1016,7 +1017,7 @@ public class TibTextUtils implements THDLWylieConstants {
                            // peek ahead to distinguish between bpa's,
                            // bpa'ala and bpa'am:
                            TGCPair nexttp = (i+1 < sz) ? gcs.get(i+1) : null;
-                            String nextwylie = (nexttp == null) ? "" : nexttp.wylie;
+                            String nextwylie = (nexttp == null) ? "" : nexttp.getWylie();
                            if (isAppendageNonVowelWylie(nextwylie)) {
                                candidateType = "maybe-appendaged-prefix-root";
                            } else {
@ -1038,7 +1039,7 @@ public class TibTextUtils implements THDLWylieConstants {
                            // peek ahead to distinguish between
                            // gga'am and gaga'ala:
                            TGCPair nexttp = (i+1 < sz) ? gcs.get(i+1) : null;
-                            String nextwylie = (nexttp == null) ? "" : nexttp.wylie;
+                            String nextwylie = (nexttp == null) ? "" : nexttp.getWylie();
                            if (isAppendageNonVowelWylie(nextwylie)) {
                                candidateType = "maybe-appendaged-prefix/root-root/suffix";
                            } else {
@ -1120,7 +1121,11 @@ public class TibTextUtils implements THDLWylieConstants {
                            candidateType
                                = candidateType.substring("maybe-".length()).intern();
                            // So that we get 'am, not 'm; 'ang, not 'ng:
-                            tp.wylie = WYLIE_aVOWEL + tp.wylie;
+
+                            // FIXME: cludge: weird place to do this.
+                            // pa'am, not pa'm is what we want, sure,
+                            // but doing this here is ugly.
+                            tp.setWylie(WYLIE_aVOWEL + tp.getWylie());
                        } else {
                            if (null != warnings)
                                warnings.append("Found a tsheg bar that has an achung (" + ACHUNG + ") tacked on, followed by some other thing whose wylie is " + wylie + "\n");
@ -1157,6 +1162,7 @@ public class TibTextUtils implements THDLWylieConstants {
            } else {
                throw new Error("bad cls");
            }
+            lastPair = tp;
        }
        if (candidateType.startsWith("maybe-appendaged-")) {
            if (null != warnings)
@ -1221,7 +1227,7 @@ public class TibTextUtils implements THDLWylieConstants {
                                         StringBuffer wylieBuffer) {
        TGCList gcs
            = breakTshegBarIntoGraphemeClusters(glyphList, noSuchWylie);
-        String candidateType = getClassificationOfTshegBar(gcs, warnings);
+        String candidateType = getClassificationOfTshegBar(gcs, warnings, false);
        int sz = gcs.size();
        if (candidateType == "invalid"
            || candidateType == "single-sanskrit-gc") {
@ -1237,7 +1243,7 @@ public class TibTextUtils implements THDLWylieConstants {
            for (int i = 0; i < sz; i++) {
                TGCPair tp = (TGCPair)gcs.get(i);
                int cls = tp.classification;
-                String wylie = tp.wylie;
+                String wylie = tp.getWylie();
                wylieBuffer.append(wylie);
                if (TibetanMachineWeb.isWylieTibetanConsonantOrConsonantStack(wylie)
                    || TibetanMachineWeb.isWylieSanskritConsonantStack(wylie)) {
@ -1290,9 +1296,9 @@ public class TibTextUtils implements THDLWylieConstants {
                leftover = 3;
                /* FIXME: these constants are hard-wired here, rather
                 * than in TibetanMachineWeb, because I'm lazy. */
-                String wylie1 = ((TGCPair)gcs.get(0)).wylie;
-                String wylie2 = ((TGCPair)gcs.get(1)).wylie;
-                String wylie3 = ((TGCPair)gcs.get(2)).wylie;
+                String wylie1 = ((TGCPair)gcs.get(0)).getWylie();
+                String wylie2 = ((TGCPair)gcs.get(1)).getWylie();
+                String wylie3 = ((TGCPair)gcs.get(2)).getWylie();
                if ((wylie1.equals("g") && (wylie2.equals("d") || wylie2.equals("n") || wylie2.equals("s")))
                    || (wylie1.equals("d") && (wylie2.equals("g") || wylie2.equals("m")))
                    || (wylie1.equals("b") && wylie2.equals("d"))
@ -1316,7 +1322,7 @@ public class TibTextUtils implements THDLWylieConstants {
                       || "prefix/root" == candidateType
                       || "root-suffix-postsuffix" == candidateType
                       || "root-suffix" == candidateType) {
-                String wylie1 = ((TGCPair)gcs.get(0)).wylie;
+                String wylie1 = ((TGCPair)gcs.get(0)).getWylie();
                leftover = 1;
                wylieBuffer.append(wylie1);
                if (((TGCPair)gcs.get(0)).classification
@ -1330,16 +1336,16 @@ public class TibTextUtils implements THDLWylieConstants {
                }
                if ("root-suffix-postsuffix" == candidateType) {
                    leftover = 3;
-                    String wylie2 = ((TGCPair)gcs.get(1)).wylie;
-                    String wylie3 = ((TGCPair)gcs.get(2)).wylie;
+                    String wylie2 = ((TGCPair)gcs.get(1)).getWylie();
+                    String wylie3 = ((TGCPair)gcs.get(2)).getWylie();
                    wylieBuffer.append(unambiguousPostAVowelWylie(wylie2,
                                                                  wylie3));
                }
            } else if ("prefix-root-suffix" == candidateType
                       || "prefix-root" == candidateType
                       || "prefix-root-suffix-postsuffix" == candidateType) {
-                String wylie1 = ((TGCPair)gcs.get(0)).wylie;
-                String wylie2 = ((TGCPair)gcs.get(1)).wylie;
+                String wylie1 = ((TGCPair)gcs.get(0)).getWylie();
+                String wylie2 = ((TGCPair)gcs.get(1)).getWylie();
                leftover = 2;
                if (TibetanMachineWeb.isAmbiguousWylie(wylie1, wylie2))
                    wylieBuffer.append(wylie1 + WYLIE_DISAMBIGUATING_KEY + wylie2);
@ -1357,8 +1363,8 @@ public class TibTextUtils implements THDLWylieConstants {
                }
                if ("prefix-root-suffix-postsuffix" == candidateType) {
                    leftover = 4;
-                    String wylie3 = ((TGCPair)gcs.get(2)).wylie;
-                    String wylie4 = ((TGCPair)gcs.get(3)).wylie;
+                    String wylie3 = ((TGCPair)gcs.get(2)).getWylie();
+                    String wylie4 = ((TGCPair)gcs.get(3)).getWylie();
                    wylieBuffer.append(unambiguousPostAVowelWylie(wylie3,
                                                                  wylie4));
                }
@ -1371,15 +1377,15 @@ public class TibTextUtils implements THDLWylieConstants {
            // append the wylie left over:
            for (int i = leftover; i < sz; i++) {
                TGCPair tp = (TGCPair)gcs.get(i);
-                String wylie = tp.wylie;
+                String wylie = tp.getWylie();
                wylieBuffer.append(wylie);
            }
        }
    }

 /**
-* Gets the Extended Wylie for a sequence of glyphs using Chandler's
-* experimental method.  This works as follows:
+* Gets the Extended Wylie for a sequence of glyphs.  This works as
+* follows:
 *
 * <p>We run along until we hit whitespace or punctuation.  We take
 * everything before that and we see if it's a legal Tibetan tsheg bar,
@ -1480,4 +1486,90 @@ public class TibTextUtils implements THDLWylieConstants {
        }
        return rv;
    }
+
+    /** Returns true if and only if the stack with Wylie <i>root</i>
+     *  can take the prefix <i>prefix</i>. */
+    private static boolean isLegalPrefixRootCombo(String prefix, String root) {
+        // This will be decomposed enough.  If you can decompose it,
+        // then it doesn't take a prefix!
+        if (!TibetanMachineWeb.isKnownHashKey(root)) {
+            root = root.replace('+', '-');
+            if (!TibetanMachineWeb.isKnownHashKey(root)) {
+                throw new Error("root is, now, " + root); // FIXME: make this an assertion
+            }
+        }
+        String ru = TibetanMachineWeb.getUnicodeForWylieForGlyph(root);
+
+        // ru may be for (head, root, sub), (head, root), (root), or
+        // (root, sub).  Try all possibilities that are possible with
+        // a String of length ru.  If there's a wa-zur, then we say
+        // (FIXME: do we say correctly?) that a stack with wa-zur can
+        // take a prefix if and only if the stack without can take a
+        // prefix.
+
+        if (ru == null) throw new Error("how? root is " + root); // FIXME: make this an assertion
+        int rl = ru.length();
+        if (ru.charAt(rl - 1) == UnicodeConstants.EWSUB_wa_zur)
+            --rl; // forget about wa-zur: see above.
+        if (rl == 2) {
+            char ch0 = ru.charAt(0);
+            char ch1 = UnicodeUtils.getNominalRepresentationOfSubscribedConsonant(ru.charAt(1));
+
+            // (head, root) and (root, sub) are possibilities.
+            if (ACHUNG.equals(prefix)) {
+                return LegalTshegBar.takesAchungPrefix(ch0, ch1, UnicodeConstants.EW_ABSENT)
+                    || LegalTshegBar.takesAchungPrefix(UnicodeConstants.EW_ABSENT, ch0, ch1);
+            } else if ("b".equals(prefix)) {
+                return LegalTshegBar.takesBao(ch0, ch1, UnicodeConstants.EW_ABSENT)
+                    || LegalTshegBar.takesBao(UnicodeConstants.EW_ABSENT, ch0, ch1);
+            } else if ("m".equals(prefix)) {
+                return LegalTshegBar.takesMao(ch0, ch1, UnicodeConstants.EW_ABSENT)
+                    || LegalTshegBar.takesMao(UnicodeConstants.EW_ABSENT, ch0, ch1);
+            } else if ("g".equals(prefix)) {
+                return LegalTshegBar.takesGao(ch0, ch1, UnicodeConstants.EW_ABSENT)
+                    || LegalTshegBar.takesGao(UnicodeConstants.EW_ABSENT, ch0, ch1);
+            } else if ("d".equals(prefix)) {
+                return LegalTshegBar.takesDao(ch0, ch1, UnicodeConstants.EW_ABSENT)
+                    || LegalTshegBar.takesDao(UnicodeConstants.EW_ABSENT, ch0, ch1);
+            } else {
+                throw new IllegalArgumentException("prefix is " + prefix);
+            }
+        } else if (rl == 1) {
+            char ch0 = ru.charAt(0);
+            // (root) is the only choice.
+            if (ACHUNG.equals(prefix)) {
+                return LegalTshegBar.takesAchungPrefix(UnicodeConstants.EW_ABSENT, ch0, UnicodeConstants.EW_ABSENT);
+            } else if ("b".equals(prefix)) {
+                return LegalTshegBar.takesBao(UnicodeConstants.EW_ABSENT, ch0, UnicodeConstants.EW_ABSENT);
+            } else if ("m".equals(prefix)) {
+                return LegalTshegBar.takesMao(UnicodeConstants.EW_ABSENT, ch0, UnicodeConstants.EW_ABSENT);
+            } else if ("g".equals(prefix)) {
+                return LegalTshegBar.takesGao(UnicodeConstants.EW_ABSENT, ch0, UnicodeConstants.EW_ABSENT);
+            } else if ("d".equals(prefix)) {
+                return LegalTshegBar.takesDao(UnicodeConstants.EW_ABSENT, ch0, UnicodeConstants.EW_ABSENT);
+            } else {
+                throw new IllegalArgumentException("prefix is " + prefix);
+            }
+        } else if (rl == 3) {
+            char ch0 = ru.charAt(0);
+            char ch1 = UnicodeUtils.getNominalRepresentationOfSubscribedConsonant(ru.charAt(1));
+            char ch2 = UnicodeUtils.getNominalRepresentationOfSubscribedConsonant(ru.charAt(2));
+            // (head, root, sub) is the only choice.
+            if (ACHUNG.equals(prefix)) {
+                return LegalTshegBar.takesAchungPrefix(ch0, ch1, ch2);
+            } else if ("b".equals(prefix)) {
+                return LegalTshegBar.takesBao(ch0, ch1, ch2);
+            } else if ("m".equals(prefix)) {
+                return LegalTshegBar.takesMao(ch0, ch1, ch2);
+            } else if ("g".equals(prefix)) {
+                return LegalTshegBar.takesGao(ch0, ch1, ch2);
+            } else if ("d".equals(prefix)) {
+                return LegalTshegBar.takesDao(ch0, ch1, ch2);
+            } else {
+                throw new IllegalArgumentException("prefix is " + prefix);
+            }
+        } else {
+            return false;
+        }
+    }
 }
--- a/source/org/thdl/tib/text/TibetanMachineWeb.java
+++ b/source/org/thdl/tib/text/TibetanMachineWeb.java
@ -178,14 +178,19 @@ public class TibetanMachineWeb implements THDLWylieConstants {



-    // NOTE WELL: if you delete from consonants, numbers, vowels, or
-    // others, you'll change the way Jskad's Extended Wylie keyboard
-    // works, yes, but you'll also change TMW->Wylie.
+    // NOTE WELL: if you delete from tibetanConsonants,
+    // otherConsonants, numbers, vowels, or others, you'll change the
+    // way Jskad's Extended Wylie keyboard works, yes, but you'll also
+    // change TMW->Wylie.

-    /** comma-delimited list of supported consonants (Tibetan and
-        Tibetanized Sanskrit): */
-	private static final String consonants
-        = "k,kh,g,ng,c,ch,j,ny,t,th,d,n,p,ph,b,m,ts,tsh,dz,w,zh,z,',y,r,l,sh,s,h,a,T,Th,D,N,Sh,v,f,Dz";
+    /** comma-delimited list of supported Tibetan consonants: */
+	private static final String tibetanConsonants
+        = "k,kh,g,ng,c,ch,j,ny,t,th,d,n,p,ph,b,m,ts,tsh,dz,w,zh,z,',y,r,l,sh,s,h,a";
+
+    /** comma-delimited list of supported non-Tibetan consonants, such
+     *  as Sanskrit consonants: */
+	private static final String otherConsonants // va and fa are treated pretty-much like Sanskrit.
+        = "T,Th,D,N,Sh,v,f,Dz";

    /** comma-delimited list of supported numbers (superscribed,
        subscribed, normal, half-numerals): */
@ -371,7 +376,7 @@ public class TibetanMachineWeb implements THDLWylieConstants {
        charSet = new HashSet();

        tibSet = new HashSet();
-		sTok = new StringTokenizer(consonants, ",");
+		sTok = new StringTokenizer(tibetanConsonants, ",");
 		while (sTok.hasMoreTokens()) {
            String ntk;
 			charSet.add(ntk = sTok.nextToken());
@ -379,6 +384,15 @@ public class TibetanMachineWeb implements THDLWylieConstants {
            validInputSequences.put(ntk, anyOldObjectWillDo);
        }

+        sanskritStackSet = new HashSet();
+		sTok = new StringTokenizer(otherConsonants, ",");
+		while (sTok.hasMoreTokens()) {
+            String ntk;
+			charSet.add(ntk = sTok.nextToken());
+            sanskritStackSet.add(ntk);
+            validInputSequences.put(ntk, anyOldObjectWillDo);
+        }
+
        numberSet = new HashSet();
 		sTok = new StringTokenizer(numbers, ",");
 		while (sTok.hasMoreTokens()) {
@ -386,7 +400,7 @@ public class TibetanMachineWeb implements THDLWylieConstants {
            // do it in <?Input:Numbers?> so that Jskad has the same
            // TMW->Wylie conversion regardless of whether or not it
            // chooses to support inputting numbers.  Likewise for
-            // consonants, others, and vowels.
+            // tibetanConsonants, otherConsonants, others, and vowels.
            String ntk;
 			charSet.add(ntk = sTok.nextToken());
            numberSet.add(ntk);
@ -427,8 +441,6 @@ public class TibetanMachineWeb implements THDLWylieConstants {

 			boolean ignore = false;

-            sanskritStackSet = new HashSet();
-
 			while ((line = in.readLine()) != null) {
 				if (line.startsWith("<?")) { //line is command
 					if (line.equalsIgnoreCase("<?Consonants?>")) {
@ -1182,6 +1194,23 @@ public static boolean hasGlyph(String hashKey) {
 		return true;
 }

+/** Returns the Unicode correspondence for the Wylie wylie, which must
+ *  be Wylie returned by getWylieForGlyph(int, int, boolean[]).
+ *  Returns null if the Unicode correspondence is nonexistent or
+ *  unknown. */
+public static String getUnicodeForWylieForGlyph(String wylie) {
+    DuffCode dc = getGlyph(wylie);
+    return mapTMWtoUnicode(dc.getFontNum() - 1, dc.getCharNum());
+}
+
+/**
+* Returns true if and only if hashKey is a known hash key from tibwn.ini.
+*/
+public static boolean isKnownHashKey(String hashKey) {
+	DuffCode[] dc = (DuffCode[])tibHash.get(hashKey);
+    return (null != dc);
+}
+
 /**
 * Gets a glyph for this hash key. Hash keys are not identical to Extended
 * Wylie. The hash key for a Tibetan stack separates the members of the stack
@ -1193,7 +1222,7 @@ public static boolean hasGlyph(String hashKey) {
 public static DuffCode getGlyph(String hashKey) {
 	DuffCode[] dc = (DuffCode[])tibHash.get(hashKey);
    if (null == dc)
-        throw new Error("It is likely that you misconfigured tibwn.ini such that, say, M is expected (i.e., it is listed as, e.g. punctuation), but no 'M~...' line appears.");
+        throw new Error("Hash key " + hashKey + " not found; it is likely that you misconfigured tibwn.ini such that, say, M is expected (i.e., it is listed as, e.g. punctuation), but no 'M~...' line appears.");
 	return dc[TMW];
 }

--- a/source/org/thdl/tib/text/tibwn.ini
+++ b/source/org/thdl/tib/text/tibwn.ini
@ -98,13 +98,6 @@ __TILDE__~93,5~~9,91~~~~~~~none


 <?Input:Tibetan?>
-
-// 0F5F,0F39 might work, but the OpenType font's author must've had
-// Dza in mind if it does.  Note that the bottommost horizontal stroke
-// goes upward on U+0F5F and downward on U+0F5B.
-Dz~146,5~~10,42~~~~~~~none
-f~153,5~~10,58~1,110~1,118~1,124~1,126~10,114~10,123~0F55,0F39
-v~154,5~~10,59~1,110~1,118~1,124~1,126~10,114~10,123~0F56,f39
 k~33,1~1,92~1,33~1,109~1,111~1,123~1,125~10,118~10,120~0F40
 kh~34,1~~1,34~1,109~1,118~1,123~1,125~10,114~10,123~0F41
 g~35,1~1,93~1,35~1,109~1,111~1,123~1,125~10,118~10,120~0F42
@ -135,11 +128,6 @@ sh~59,1~1,99~1,60~1,109~1,111~1,123~1,125~10,118~10,120~0F64
 s~60,1~~1,61~1,109~1,118~1,123~1,125~10,114~10,123~0F66
 h~61,1~1,100~1,62~1,109~1,112~1,123~1,125~10,115~10,122~0F67~1,102
 a~62,1~~1,63~1,109~1,118~1,123~1,125~10,114~10,123~0F68
-T~170,1~~1,64~1,109~1,120~1,123~1,125~10,115~10,124~0F4A
-Th~171,1~~1,65~1,109~1,118~1,123~1,125~10,114~10,123~0F4B
-D~172,1~~1,66~1,109~1,120~1,123~1,125~10,115~10,124~0F4C
-N~173,1~~1,67~1,109~1,118~1,123~1,125~10,115~10,124~0F4E
-Sh~174,1~~1,68~1,109~1,118~1,123~1,125~10,115~10,124~0F65
 r-k~63,1~~1,70~1,109~1,121~1,123~1,125~10,115~10,124~f62,f90
 r-g~64,1~~1,71~1,109~1,121~1,123~1,125~10,115~10,124~f62,f92
 r-ng~65,1~~1,72~1,109~1,119~1,123~1,125~10,115~10,124~f62,f94
@ -241,6 +229,17 @@ au~237,1~~8,89~~~~~~~0F7D~~8,104
 // DLC FIXME: need -I as well

 <?Input:Sanskrit?>
+// 0F5F,0F39 might work, but the OpenType font's author must've had
+// Dza in mind if it does.  Note that the bottommost horizontal stroke
+// goes upward on U+0F5F and downward on U+0F5B.
+Dz~146,5~~10,42~~~~~~~none
+f~153,5~~10,58~1,110~1,118~1,124~1,126~10,114~10,123~0F55,0F39
+v~154,5~~10,59~1,110~1,118~1,124~1,126~10,114~10,123~0F56,f39
+T~170,1~~1,64~1,109~1,120~1,123~1,125~10,115~10,124~0F4A
+Th~171,1~~1,65~1,109~1,118~1,123~1,125~10,114~10,123~0F4B
+D~172,1~~1,66~1,109~1,120~1,123~1,125~10,115~10,124~0F4C
+N~173,1~~1,67~1,109~1,118~1,123~1,125~10,115~10,124~0F4E
+Sh~174,1~~1,68~1,109~1,118~1,123~1,125~10,115~10,124~0F65
 k+Sh~175,1~~1,69~1,109~1,122~1,123~1,125~10,116~10,125~0F69
 k+k~33,2~~3,33~1,109~4,120~1,123~1,125~4,106~4,113~f40,f90
 k+kh~34,2~~3,34~1,109~4,120~1,123~1,125~4,106~4,113~f40,f91
--- a/source/org/thdl/tib/text/tshegbar/LegalTshegBar.java
+++ b/source/org/thdl/tib/text/tshegbar/LegalTshegBar.java
@ -1266,7 +1266,7 @@ public final class LegalTshegBar
     *  @param sub the {@link #isNominalRepresentationOfConsonant(char)
     *  nominal representation} of the subjoined letter, or EW_ABSENT
     *  if not present */
-    static boolean takesGao(char head, char root, char sub) {
+    public static boolean takesGao(char head, char root, char sub) {
        if (EW_ABSENT == head) {
            if (EW_ABSENT == sub) {
                return (EWC_ca == root
@ -1298,7 +1298,7 @@ public final class LegalTshegBar
     *  @param sub the {@link #isNominalRepresentationOfConsonant(char)
     *  nominal representation} of the subjoined letter, or EW_ABSENT
     *  if not present */
-    static boolean takesDao(char head, char root, char sub) {
+    public static boolean takesDao(char head, char root, char sub) {
        if (EW_ABSENT == head) {
            if (EW_ABSENT == sub) {
                return (EWC_ka == root
@ -1312,6 +1312,7 @@ public final class LegalTshegBar
                        || (EWC_pa == root && EWC_ya == sub)
                        || (EWC_ba == root && EWC_ya == sub)
                        || (EWC_ma == root && EWC_ya == sub)
+                        || (EWC_ka == root && EWC_ya == sub) // dkyil, for example

                        || (EWC_ka == root && EWC_ra == sub)
                        || (EWC_ga == root && EWC_ra == sub)
@ -1336,7 +1337,7 @@ public final class LegalTshegBar
     *  @param sub the {@link #isNominalRepresentationOfConsonant(char)
     *  nominal representation} of the subjoined letter, or EW_ABSENT
     *  if not present */
-    static boolean takesAchungPrefix(char head, char root, char sub) {
+    public static boolean takesAchungPrefix(char head, char root, char sub) {
        if (EW_ABSENT == head) {
            if (EW_ABSENT == sub) {
                return (EWC_ga == root
@ -1379,7 +1380,7 @@ public final class LegalTshegBar
     *  @param sub the {@link #isNominalRepresentationOfConsonant(char)
     *  nominal representation} of the subjoined letter, or EW_ABSENT
     *  if not present */
-    static boolean takesMao(char head, char root, char sub) {
+    public static boolean takesMao(char head, char root, char sub) {
        if (EW_ABSENT == head) {
            if (EW_ABSENT == sub) {
                return (EWC_kha == root
@ -1418,11 +1419,12 @@ public final class LegalTshegBar
     *  @param sub the {@link #isNominalRepresentationOfConsonant(char)
     *  nominal representation} of the subjoined letter, or EW_ABSENT
     *  if not present */
-    static boolean takesBao(char head, char root, char sub) {
+    public static boolean takesBao(char head, char root, char sub) {
        // DLC ask Ten-lo la about Wazur.
        if (EW_ABSENT == head) {
            if (EW_ABSENT == sub) {
                return (EWC_ka == root
+                        || EWC_sa == root // bsams, for example
                        || EWC_ca == root
                        || EWC_ta == root
                        || EWC_tsa == root
--- a/source/org/thdl/tib/text/tshegbar/UnicodeUtils.java
+++ b/source/org/thdl/tib/text/tshegbar/UnicodeUtils.java
@ -232,6 +232,14 @@ public class UnicodeUtils implements UnicodeConstants {
        /* DLC FIXME -- I was using 3.0 p.437-440, check 3.2. */
    }

+    /** If ch is in one of the ranges U+0F90-U+0F97, U+0F99-U+0FB9,
+     *  then this returns the same consonant in the range
+     *  U+0F40-U+0F69.  If ch is not in that range, this returns
+     *  garbage. */
+    public static char getNominalRepresentationOfSubscribedConsonant(char ch) {
+        return (char)((int)ch-(((int)'\u0F90') - ((int)'\u0F40')));
+    }
+
    /** Returns true iff ch corresponds to the Tibetan letter ra.
        Several Unicode codepoints correspond to the Tibetan letter ra
        (in its subscribed form or otherwise).  Oftentimes,
--- a/source/org/thdl/tib/text/ttt/ACIPConverter.java
+++ b/source/org/thdl/tib/text/ttt/ACIPConverter.java
@ -58,28 +58,46 @@ public class ACIPConverter {
        ArrayList al = ACIPTshegBarScanner.scanFile(args[1], errors, strict, maxErrors - 1);

        if (null == al) {
-            System.err.println(maxErrors + " or more errors occurred while scanning ACIP input file; is this");
+            System.err.println(maxErrors + " or more lexical errors occurred while scanning ACIP input file; is this");
            System.err.println("Tibetan or English input?");
            System.err.println("");
-            System.err.println("First " + maxErrors + " errors scanning ACIP input file: ");
-            System.err.println(errors);
-            System.err.println("Exiting with " + maxErrors + " or more errors; please fix input file and try again.");
+            if (false) {
+                // Nobody wants to see this.  FIXME: maybe somebody; have an option.
+                System.err.println("First " + maxErrors + " lexical errors scanning ACIP input file: ");
+                System.err.println(errors);
+            }
+            System.err.println("Exiting with " + maxErrors + " or more lexical errors; please fix input file and try again.");
            System.exit(1);
        }
+        final boolean abortUponScanningError = false; // DLC MAKE ME CONFIGURABLE
+        // DLC NOW: BAo isn't converting.
        if (errors.length() > 0) {
            System.err.println("Errors scanning ACIP input file: ");
            System.err.println(errors);
-            System.err.println("Exiting; please fix input file and try again.");
-            System.exit(1);
+            if (abortUponScanningError) {
+                System.err.println("Exiting; please fix input file and try again.");
+                System.exit(1);
+            }
        }

-        convertToUnicode(al, System.out, errors);
+        StringBuffer warnings = new StringBuffer();
+        boolean putWarningsInOutput = true; // DLC make me configurable.
+        convertToUnicode(al, System.out, errors, warnings,
+                         putWarningsInOutput);
        if (errors.length() > 0) {
            System.err.println("Errors converting ACIP input file: ");
            System.err.println(errors);
+            System.err.println("The output contains these errors.");
            System.err.println("Exiting; please fix input file and try again.");
            System.exit(2);
        }
+        if (warnings.length() > 0) {
+            System.err.println("Warnings converting ACIP input file: ");
+            System.err.println(warnings);
+            if (putWarningsInOutput)
+                System.err.println("The output contains these warnings.");
+            System.exit(2);
+        }
        if (verbose) System.err.println("Converted " + args[1] + " perfectly.");
        System.exit(0);
    }
@ -96,19 +114,30 @@ public class ACIPConverter {
    {
        throw new Error("DLC UNIMPLEMENTED");
    }
+    // DLC FIXME: sometimes { } is \u0F0B, and sometimes it is a
+    // space.  Treat it as a tsheg only when it appears after a
+    // syllable or another tsheg.

    /** Returns UTF-8 encoded Unicode.  A bit indirect, so use this
     *  for testing only if performance is a concern.  If errors occur
     *  in scanning the ACIP or in converting a tsheg bar, then they
-     *  are appended to errors if errors is non-null.  Returns the
+     *  are appended to errors if errors is non-null, as well as
+     *  written to the result.  If warnings occur in scanning the ACIP
+     *  or in converting a tsheg bar, then they are appended to
+     *  warnings if warnings is non-null, and they are written to the
+     *  result if writeWarningsToResult is true.  Returns the
     *  conversion upon perfect success, null if errors occurred.
     */
    public static String convertToUnicode(String acip,
-                                          StringBuffer errors) {
+                                          StringBuffer errors,
+                                          StringBuffer warnings,
+                                          boolean writeWarningsToResult) {
        ByteArrayOutputStream sw = new ByteArrayOutputStream();
        ArrayList al = ACIPTshegBarScanner.scan(acip, errors, true /* DLC FIXME */, -1);
        try {
-            if (null != al && convertToUnicode(al, sw, errors)) {
+            if (null != al
+                && convertToUnicode(al, sw, errors,
+                                    warnings, writeWarningsToResult)) {
                return sw.toString("UTF-8");
            } else {
                System.out.println("DLC al is " + al + " and convertToUnicode returned null.");
@ -119,15 +148,25 @@ public class ACIPConverter {
        }
    }

-    /** Writes Unicode to out.  If errors occur in converting a
-     *  tsheg bar, then they are appended to errors if errors is
-     *  non-null.  Returns true upon perfect success, false if errors
-     *  occurred.
+    /** Writes Unicode to out.  If errors occur in converting a tsheg
+     *  bar, then they are appended to errors if errors is non-null.
+     *  Furthermore, errors are written to out.  If writeWarningsToOut
+     *  is true, then warnings also will be written to out.  Returns
+     *  true upon perfect success, false if errors occurred.
+     *  @param scan result of ACIPTshegBarScanner.scan(..)
+     *  @param out stream to which to write converted text
+     *  @param errors if non-null, all error messages are appended
+     *  @param warnings if non-null, all warning messages are appended
+     *  to this
+     *  @param writeWarningsToOut if true, then all warning messages
+     *  are written to out in the appropriate places
     *  @throws IOException if we cannot write to out
     */
    public static boolean convertToUnicode(ArrayList scan,
                                           OutputStream out,
-                                           StringBuffer errors)
+                                           StringBuffer errors,
+                                           StringBuffer warnings,
+                                           boolean writeWarningsToOut)
        throws IOException
    {
        int sz = scan.size();
@ -139,7 +178,7 @@ public class ACIPConverter {
            int stype = s.getType();
            if (stype == ACIPString.ERROR) {
                hasErrors = true;
-                writer.write("[#ERROR CONVERTING ACIP DOCUMENT: ");
+                writer.write("[#ERROR CONVERTING ACIP DOCUMENT: Lexical error: ");
                writer.write(s.getText());
                writer.write("]");
            } else {
@ -179,6 +218,21 @@ public class ACIPConverter {
                                    if (null != errors)
                                        errors.append(errorMessage + "\n");
                                } else {
+                                    String warning
+                                        = pt.getWarning(false, // DLC: make me configurable
+                                                        pl,
+                                                        s.getText());
+                                    if (null != warning) {
+                                        if (writeWarningsToOut) {
+                                            writer.write("[#WARNING CONVERTING ACIP DOCUMENT: ");
+                                            writer.write(warning);
+                                            writer.write("]");
+                                        }
+                                        if (null != warnings) {
+                                            warnings.append(warning);
+                                            warnings.append('\n');
+                                        }
+                                    }
                                    unicode = sl.getUnicode();
                                    if (null == unicode) throw new Error("DLC: HOW?");
                                }
--- a/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java
+++ b/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java
@ -133,16 +133,18 @@ public class ACIPTshegBarScanner {
        Stack bracketTypeStack = new Stack();
        int startSlashIndex = -1;
        int startParenIndex = -1;
+        int numNewlines = 0;
        for (int i = 0; i < sl; i++) {
            if (i < startOfString) throw new Error("bad reset");
            char ch;
            ch = s.charAt(i);
+            if (ch == '\n') ++numNewlines;
            if (ACIPString.COMMENT == currentType && ch != ']') {
                if ('[' == ch) {
                    al.add(new ACIPString("Found an open bracket within a [#COMMENT]-style comment.  Brackets may not appear in comments.\n",
                                          ACIPString.ERROR));
                    if (null != errors)
-                        errors.append("Offset " + i + ": "
+                        errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                      + "Found an open bracket within a [#COMMENT]-style comment.  Brackets may not appear in comments.\n");
                    if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
                }
@ -157,17 +159,18 @@ public class ACIPTshegBarScanner {
                        al.add(new ACIPString(s.substring(startOfString, i),
                                              currentType));
                    }
-                    al.add(new ACIPString(s.substring(i, i+1), ACIPString.ERROR));
+                    al.add(new ACIPString("Found a truly unmatched close bracket, " + s.substring(i, i+1),
+                                          ACIPString.ERROR));
                    if (!waitingForMatchingIllegalClose) {
                        if (null != errors) {
-                            errors.append("Offset " + i + ": "
+                            errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                          + "Found a truly unmatched close bracket, ] or }.\n");
                        }
                        if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
                    }
                    waitingForMatchingIllegalClose = false;
                    if (null != errors)
-                        errors.append("Offset " + i + ": "
+                        errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                      + "Found a closing bracket without a matching open bracket.  Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
                    if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
                    startOfString = i+1;
@ -249,6 +252,11 @@ public class ACIPTshegBarScanner {
                               || s.substring(i, i + "[BP]".length()).equals("{BP}"))) {
                    thingy = "[BP]";
                    currentType = ACIPString.BP;
+                } else if (i + "[BLANK PAGE]".length() <= sl
+                           && (s.substring(i, i + "[BLANK PAGE]".length()).equals("[BLANK PAGE]")
+                               || s.substring(i, i + "[BLANK PAGE]".length()).equals("{BLANK PAGE}"))) {
+                    thingy = "[BLANK PAGE]";
+                    currentType = ACIPString.BP;
                } else if (i + "[ BP ]".length() <= sl
                           && (s.substring(i, i + "[ BP ]".length()).equals("[ BP ]")
                               || s.substring(i, i + "[ BP ]".length()).equals("{ BP }"))) {
@ -414,11 +422,11 @@ public class ACIPTshegBarScanner {
                    // This is an error.  Sometimes [COMMENTS APPEAR
                    // WITHOUT # MARKS].  Though "... [" could cause
                    // this too.
-                    al.add(new ACIPString(s.substring(i, i+1),
+                    al.add(new ACIPString("Found an illegal open bracket: " + s.substring(i, i+1),
                                          ACIPString.ERROR));
                    if (waitingForMatchingIllegalClose) {
                        if (null != errors) {
-                            errors.append("Offset " + i + ": "
+                            errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                          + "Found a truly unmatched open bracket, [ or {, prior to this current illegal open bracket.\n");
                        }
                        if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
@ -435,7 +443,7 @@ public class ACIPTshegBarScanner {
                                inContext = inContext + "...";
                            }
                        }
-                        errors.append("Offset " + i + ": "
+                        errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                      + "Found an illegal open bracket (in context, this is " + inContext + ").  Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\n");
                        if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
                    }
@ -477,7 +485,6 @@ public class ACIPTshegBarScanner {
                            if (i+numdigits+2 < sl && s.charAt(i+numdigits+2) == '.') {
                                if (!(i+numdigits+4 < sl && isNumeric(s.charAt(i+numdigits+3))
                                      && !isNumeric(s.charAt(i+numdigits+4)))) {
-                                    al.add(new ACIPString(s.substring(i, i+numdigits+3), ACIPString.ERROR));
                                    String inContext = s.substring(i, i+Math.min(sl-i, 10));
                                    if (inContext.indexOf("\r") >= 0) {
                                        inContext = inContext.substring(0, inContext.indexOf("\r"));
@ -488,8 +495,10 @@ public class ACIPTshegBarScanner {
                                            inContext = inContext + "...";
                                        }
                                    }
+                                    al.add(new ACIPString("Found an illegal at sign, @ (in context, this is " + inContext + ").  This folio marker has a period, '.', at the end of it, which is illegal.",
+                                                          ACIPString.ERROR));
                                    if (null != errors)
-                                        errors.append("Offset " + i + ": "
+                                        errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                                      + "Found an illegal at sign, @ (in context, this is " + inContext + ").  This folio marker has a period, '.', at the end of it, which is illegal.\n");
                                    if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
                                    startOfString = i+numdigits+3;
@ -498,7 +507,6 @@ public class ACIPTshegBarScanner {
                                    break;
                                }
                                if (i+numdigits+4 < sl && (s.charAt(i+numdigits+4) == '.' || s.charAt(i+numdigits+4) == 'A' || s.charAt(i+numdigits+4) == 'B' || s.charAt(i+numdigits+4) == 'a' || s.charAt(i+numdigits+4) == 'b' || isNumeric(s.charAt(i+numdigits+4)))) {
-                                    al.add(new ACIPString(s.substring(i, i+1), ACIPString.ERROR));
                                    String inContext = s.substring(i, i+Math.min(sl-i, 10));
                                    if (inContext.indexOf("\r") >= 0) {
                                        inContext = inContext.substring(0, inContext.indexOf("\r"));
@ -509,8 +517,10 @@ public class ACIPTshegBarScanner {
                                            inContext = inContext + "...";
                                        }
                                    }
+                                    al.add(new ACIPString("Found an illegal at sign, @ (in context, this is " + inContext + ").  This folio marker is not followed by whitespace, as is expected.",
+                                                          ACIPString.ERROR));
                                    if (null != errors)
-                                        errors.append("Offset " + i + ": "
+                                        errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                                      + "Found an illegal at sign, @ (in context, this is " + inContext + ").  This folio marker is not followed by whitespace, as is expected.\n");
                                    if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
                                    startOfString = i+1; // DLC FIXME: skip over more?
@ -572,7 +582,9 @@ public class ACIPTshegBarScanner {
                    }
                    
                    // This case, @NNN, must come after the @NNN{AB} case.
-                    if (i+numdigits+1 < sl && s.charAt(i+numdigits+1) == ' ') {
+                    if (i+numdigits+1 < sl && (s.charAt(i+numdigits+1) == ' '
+                                               || s.charAt(i+numdigits+1) == '\n'
+                                               || s.charAt(i+numdigits+1) == '\r')) {
                        boolean allAreNumeric = true;
                        for (int k = 1; k <= numdigits; k++) {
                            if (!isNumeric(s.charAt(i+k))) {
@ -591,7 +603,6 @@ public class ACIPTshegBarScanner {
                    }
                }
                if (startOfString == i) {
-                    al.add(new ACIPString(s.substring(i, i+1), ACIPString.ERROR));
                    String inContext = s.substring(i, i+Math.min(sl-i, 10));
                    if (inContext.indexOf("\r") >= 0) {
                        inContext = inContext.substring(0, inContext.indexOf("\r"));
@ -602,8 +613,10 @@ public class ACIPTshegBarScanner {
                            inContext = inContext + "...";
                        }
                    }
+                    al.add(new ACIPString("Found an illegal at sign, @ (in context, this is " + inContext + ").  @012B is an example of a legal folio marker.",
+                                          ACIPString.ERROR));
                    if (null != errors)
-                        errors.append("Offset " + i + ": "
+                        errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                      + "Found an illegal at sign, @ (in context, this is " + inContext + ").  @012B is an example of a legal folio marker.\n");
                    if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
                    startOfString = i+1;
@ -626,9 +639,10 @@ public class ACIPTshegBarScanner {
                         * it means /NYA/.  We warn about // for this
                         * reason.  \\ causes a tsheg-bar error (DLC
                         * FIXME: verify this is so). */
-                        al.add(new ACIPString("//", ACIPString.ERROR));
+                        al.add(new ACIPString("Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.",
+                                              ACIPString.ERROR));
                        if (errors != null) {
-                            errors.append("Offset " + i + ": "
+                            errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                          + "Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\n");
                        }
                        if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
@ -661,9 +675,10 @@ public class ACIPTshegBarScanner {

                if (startParenIndex >= 0) {
                    if (ch == '(') {
-                        al.add(new ACIPString("Nesting of parentheses () is not allowed", ACIPString.ERROR));
+                        al.add(new ACIPString("Found an illegal open parenthesis, (.  Nesting of parentheses is not allowed.",
+                                              ACIPString.ERROR));
                        if (null != errors)
-                            errors.append("Offset " + i + ": "
+                            errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                          + "Found an illegal open parenthesis, (.  Nesting of parentheses is not allowed.\n");
                        if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
                    } else {
@ -674,9 +689,10 @@ public class ACIPTshegBarScanner {
                    currentType = ACIPString.ERROR;
                } else {
                    if (ch == ')') {
-                        al.add(new ACIPString("Unexpected closing parenthesis )", ACIPString.ERROR));
+                        al.add(new ACIPString("Unexpected closing parenthesis, ), found.",
+                                              ACIPString.ERROR));
                        if (null != errors)
-                            errors.append("Offset " + i + ": "
+                            errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                          + "Unexpected closing parenthesis, ), found.\n");
                        if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
                    } else {
@ -724,10 +740,10 @@ public class ACIPTshegBarScanner {
                    al.add(new ACIPString(s.substring(i, i+1),
                                          ACIPString.TIBETAN_PUNCTUATION));
                } else {
-                    al.add(new ACIPString(s.substring(i, i+1),
+                    al.add(new ACIPString("A non-breaking tsheg, '.', appeared, but not like \"...,\" or \".,\" or \".dA\" or \".DA\".",
                                          ACIPString.ERROR));
                    if (null != errors)
-                        errors.append("Offset " + i + ": "
+                        errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                      + "A non-breaking tsheg, '.', appeared, but not like \"...,\" or \".,\" or \".dA\" or \".DA\".\n");
                    if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
                }
@ -772,19 +788,24 @@ public class ACIPTshegBarScanner {
                        al.add(new ACIPString(s.substring(startOfString, i),
                                              currentType));
                    }
-                    al.add(new ACIPString(s.substring(i, i+1),
-                                          ACIPString.ERROR));
-                    if (null != errors) {
-                        if ((int)ch == 65533) {
-                            errors.append("Offset " + i + ": "
+                    if ((int)ch == 65533) {
+                        al.add(new ACIPString("Found an illegal, unprintable character.",
+                                              ACIPString.ERROR));
+                        if (null != errors)
+                            errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                          + "Found an illegal, unprintable character.\n");
-                        } else if ('\\' == ch) {
-                            errors.append("Offset " + i + ": "
+                    } else if ('\\' == ch) {
+                        al.add(new ACIPString("Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly.  Sorry!  Please do complain to the maintainers.",
+                                              ACIPString.ERROR));
+                        if (null != errors)
+                            errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                          + "Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly.  Sorry!  Please do complain to the maintainers.\n");
-                        } else {
-                            errors.append("Offset " + i + ": "
+                    } else {
+                        al.add(new ACIPString("Found an illegal character, " + ch + ", with ordinal " + (int)ch + ".",
+                                              ACIPString.ERROR));
+                        if (null != errors)
+                            errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                          + "Found an illegal character, " + ch + ", with ordinal " + (int)ch + ".\n");
-                        }
                    }
                    if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
                    startOfString = i+1;
--- a/source/org/thdl/tib/text/ttt/PackageTest.java
+++ b/source/org/thdl/tib/text/ttt/PackageTest.java
@ -128,7 +128,7 @@ public class PackageTest extends TestCase {
        }

        {
-            TStackListList legalParses = pt.getUniqueParse();
+            TStackListList legalParses = pt.getUniqueParse(false);
            boolean goodness2 = (expectedLegalParses == null
                                 || expectedLegalParses.length == legalParses.size());
            for (int i = 0 ; i < legalParses.size(); i++) {
@ -139,18 +139,21 @@ public class PackageTest extends TestCase {
                                || expectedLegalParses.length < i+1
                                || n.equals(expectedLegalParses[i]));
                if (!okay || !goodness2)
-                    System.out.println("Legal parse " + (i) + " (from zero) is " + n + " (toString2=" + n.toString2() + ") and expected is " + expectedLegalParses[i]);
+                    System.out.println("Legal parse " + (i) + " (from zero) is " + n + " (toString2=" + n.toString2() + ") and expected is "
+                                       + ((i < expectedLegalParses.length)
+                                          ? expectedLegalParses[i]
+                                          : "not present"));
                assertTrue(okay);
            }
            if (!goodness2)
-                System.out.println("You expected " + expectedLegalParses.length + " legal parses, but there were instead " + legalParses.size() + " legal parses.");
+                System.out.println("You expected " + expectedLegalParses.length + " legal parses, but there were instead " + legalParses.size() + " legal parses for ACIP " + acip + ".");
            assertTrue(goodness2);
            TStackListList allLegalParses = pt.getLegalParses();
            TStackListList decentParses = pt.getNonIllegalParses();
            if (pt.getBestParse() == null) {
                if (legalParses.size() == 0) {
                    if (null != expectedBestParse && !"".equals(expectedBestParse)) {
-                        System.out.print("Expected is that there is a best parse \"" + expectedBestParse + "\" but there is no best parse for acip {" + acip + "}");
+                        System.out.print("Expected is that there is a best parse \"" + expectedBestParse + "\" but there is no best parse for ACIP {" + acip + "}");
                        assertTrue(false);
                    }
                    System.out.print("ACIPNoBestParseError: There is no best parse for the ACIP {" + acip + "}; ");
@ -163,7 +166,7 @@ public class PackageTest extends TestCase {
                    }
                } else {
                    if (legalParses.size() > 1) {
-                        System.out.println("ACIPTooManyLegalParsesError: see these " + legalParses.size() + " legal parses for acip " + acip + ": " + legalParses);
+                        System.out.println("ACIPTooManyLegalParsesError: see these " + legalParses.size() + " legal parses for ACIP " + acip + ": " + legalParses);
                        assertTrue(legalParses.size() == 2
                                   && (legalParses.get(0).size()
                                       == 1 + legalParses.get(1).size()));
@ -176,7 +179,7 @@ public class PackageTest extends TestCase {
                if (null != expectedBestParse) {
                    boolean good = pt.getBestParse().equals(expectedBestParse);
                    if (!good) {
-                        System.out.print("Expected best parse is \"" + expectedBestParse + "\" but the best parse is " + pt.getBestParse() + " for acip {" + acip + "}");
+                        System.out.print("Expected best parse is \"" + expectedBestParse + "\" but the best parse is " + pt.getBestParse() + " for ACIP {" + acip + "}");
                    }
                    assertTrue(good);
                }
@ -229,6 +232,116 @@ public class PackageTest extends TestCase {
     *  {@link TPairList#getACIPError()}, and {@link
     *  TPairList#recoverACIP()}. */
    public void testBreakACIPIntoChunks() {
+tstHelper("GASN"); // ambiguous with regard to prefix rules
+tstHelper("BARMA"); // ambiguous with regard to prefix rules
+tstHelper("MARDA"); // ambiguous with regard to prefix rules
+tstHelper("BBA"); // ambiguous with regard to prefix rules
+tstHelper("BBLUGS"); // ambiguous with regard to prefix rules
+tstHelper("BDRA"); // ambiguous with regard to prefix rules
+tstHelper("BDRAG"); // ambiguous with regard to prefix rules
+tstHelper("BDRA'I"); // ambiguous with regard to prefix rules
+tstHelper("BDRAL"); // ambiguous with regard to prefix rules
+tstHelper("BDRAN"); // ambiguous with regard to prefix rules
+tstHelper("BDRANGS"); // ambiguous with regard to prefix rules
+tstHelper("BDREN"); // ambiguous with regard to prefix rules
+tstHelper("BDRI"); // ambiguous with regard to prefix rules
+tstHelper("BDRIS"); // ambiguous with regard to prefix rules
+tstHelper("BDROL"); // ambiguous with regard to prefix rules
+tstHelper("BDRUG"); // ambiguous with regard to prefix rules
+tstHelper("BLCAG"); // ambiguous with regard to prefix rules
+tstHelper("BLCI"); // ambiguous with regard to prefix rules
+tstHelper("BLKONG"); // ambiguous with regard to prefix rules
+tstHelper("BLNGA"); // ambiguous with regard to prefix rules
+tstHelper("BLNGAG"); // ambiguous with regard to prefix rules
+tstHelper("BMA"); // ambiguous with regard to prefix rules
+tstHelper("BMYOD"); // ambiguous with regard to prefix rules
+tstHelper("BSALDA"); // ambiguous with regard to prefix rules
+tstHelper("BSAMS"); // ambiguous with regard to prefix rules
+tstHelper("BSEMS"); // ambiguous with regard to prefix rules
+tstHelper("BTSAMS"); // ambiguous with regard to prefix rules
+tstHelper("BTSIMS"); // ambiguous with regard to prefix rules
+tstHelper("DDANG"); // ambiguous with regard to prefix rules
+tstHelper("DDAR"); // ambiguous with regard to prefix rules
+tstHelper("DDRANGS"); // ambiguous with regard to prefix rules
+tstHelper("DDRUG"); // ambiguous with regard to prefix rules
+tstHelper("DNAG"); // ambiguous with regard to prefix rules
+tstHelper("DNOGS"); // ambiguous with regard to prefix rules
+tstHelper("DRBAN"); // ambiguous with regard to prefix rules
+tstHelper("DRGYU"); // ambiguous with regard to prefix rules
+tstHelper("DRTOG"); // ambiguous with regard to prefix rules
+tstHelper("DYA"); // ambiguous with regard to prefix rules
+tstHelper("DYAN"); // ambiguous with regard to prefix rules
+tstHelper("GDRA"); // ambiguous with regard to prefix rules
+tstHelper("GDRIM"); // ambiguous with regard to prefix rules
+tstHelper("GGAN"); // ambiguous with regard to prefix rules
+tstHelper("GGYUR"); // ambiguous with regard to prefix rules
+tstHelper("GLTAR"); // ambiguous with regard to prefix rules
+tstHelper("GLTUNG"); // ambiguous with regard to prefix rules
+tstHelper("GMA"); // ambiguous with regard to prefix rules
+tstHelper("GMAN"); // ambiguous with regard to prefix rules
+tstHelper("GMON"); // ambiguous with regard to prefix rules
+tstHelper("GRDEGS"); // ambiguous with regard to prefix rules
+tstHelper("GRDZU"); // ambiguous with regard to prefix rules
+tstHelper("GRGYA"); // ambiguous with regard to prefix rules
+tstHelper("GRNAGS"); // ambiguous with regard to prefix rules
+tstHelper("GRTAN"); // ambiguous with regard to prefix rules
+tstHelper("GRTOGS"); // ambiguous with regard to prefix rules
+tstHelper("GRTZO"); // ambiguous with regard to prefix rules
+tstHelper("GRTZOD"); // ambiguous with regard to prefix rules
+tstHelper("GRTZON"); // ambiguous with regard to prefix rules
+tstHelper("GSLA"); // ambiguous with regard to prefix rules
+tstHelper("GSNAD"); // ambiguous with regard to prefix rules
+tstHelper("GZLA"); // ambiguous with regard to prefix rules
+tstHelper("MBA"); // ambiguous with regard to prefix rules
+tstHelper("MBA'"); // ambiguous with regard to prefix rules
+tstHelper("MBI'I"); // ambiguous with regard to prefix rules
+tstHelper("MHA'A"); // ambiguous with regard to prefix rules
+tstHelper("MRDA"); // ambiguous with regard to prefix rules
+tstHelper("MRDO"); // ambiguous with regard to prefix rules
+tstHelper("MRDZOGS"); // ambiguous with regard to prefix rules
+tstHelper("MRGA"); // ambiguous with regard to prefix rules
+tstHelper("MRGAD"); // ambiguous with regard to prefix rules
+tstHelper("MRGAN"); // ambiguous with regard to prefix rules
+tstHelper("MRJES"); // ambiguous with regard to prefix rules
+tstHelper("MRJOD"); // ambiguous with regard to prefix rules
+tstHelper("MRTOGS"); // ambiguous with regard to prefix rules
+tstHelper("MRTOL"); // ambiguous with regard to prefix rules
+tstHelper("MRTZE'I"); // ambiguous with regard to prefix rules
+tstHelper("MRTZIGS"); // ambiguous with regard to prefix rules
+tstHelper("MSAM"); // ambiguous with regard to prefix rules
+tstHelper("MSGRIB"); // ambiguous with regard to prefix rules
+tstHelper("MSKYES"); // ambiguous with regard to prefix rules
+tstHelper("MSON"); // ambiguous with regard to prefix rules
+tstHelper("MSOS"); // ambiguous with regard to prefix rules
+tstHelper("MSTAMS"); // ambiguous with regard to prefix rules
+tstHelper("MSTAN"); // ambiguous with regard to prefix rules
+
+
+
+
+
+        // If you're not careful, you'll think GGYES is a legal
+        // Tibetan tsheg bar and parse it as {G}{G+YE}{S}.  But it's
+        // Sanskrit, really, because GA doesn't take a GA prefix.
+        // This doesn't occur in ACIP input files that I've seen, but
+        // GGYI (S1000I.INC) and GGYUR (S5275MC4.ACT) do occur.
+        tstHelper("GGYES", "{G}{G}{YE}{S}",
+                  new String[] { "{G}{G}{YE}{S}", "{G}{G+YE}{S}", "{G+G}{YE}{S}" },
+                  new String[] { },
+                  "{G+G}{YE}{S}");
+
+        tstHelper("DRUG", "{D}{RU}{G}",
+                  new String[] { "{D}{RU}{G}", "{D+RU}{G}" },
+                  new String[] { "{D+RU}{G}" },
+                  "{D+RU}{G}");
+
+
+        tstHelper("d+H+d+HA", "{d+}{H+}{d+}{HA}",
+                  new String[] { "{d+H+d+HA}" },
+                  new String[] { "{d+H+d+HA}" });
+
+        tstHelper("Gd+H+d+HA");
+
        tstHelper("AUTPA", "{AU}{T}{PA}",
                  new String[] { "{AU}{T}{PA}", "{AU}{T+PA}" },
                  new String[] { },
@ -249,7 +362,8 @@ public class PackageTest extends TestCase {
                  new String[] { "{G+R+VA}{'I}" });
        tstHelper("G-RVA'I", "{G-}{R}{VA}{'I}",
                  new String[] { "{G}{R+VA}{'I}" },
-                  new String[] { "{G}{R+VA}{'I}" });
+                  new String[] { },
+                  "{G}{R+VA}{'I}");
        tstHelper("RVA", "{R}{VA}",
                  new String[] { "{R+VA}" },
                  new String[] { "{R+VA}" });
@ -6967,8 +7081,8 @@ tstHelper("ZUR");
              "",
              "[TIBETAN_NON_PUNCTUATION:{LA}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, TIBETAN_NON_PUNCTUATION:{SGRUB}]"); // DLC FIXME
        shelp("PAS... LA",
-              "Offset 5: A non-breaking tsheg, '.', appeared, but not like \"...,\" or \".,\" or \".dA\" or \".DA\".\n",
-              "[TIBETAN_NON_PUNCTUATION:{PAS}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, ERROR:{.}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{LA}]");
+              "Offset 5 or maybe 5: A non-breaking tsheg, '.', appeared, but not like \"...,\" or \".,\" or \".dA\" or \".DA\".\n",
+              "[TIBETAN_NON_PUNCTUATION:{PAS}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, ERROR:{A non-breaking tsheg, '.', appeared, but not like \"...,\" or \".,\" or \".dA\" or \".DA\".}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{LA}]");
        shelp("PAS... LA",
              "",
              true,
@ -6983,28 +7097,28 @@ tstHelper("ZUR");
        shelp("", "", "[]");
        shelp("[DD]", "");
        shelp("[",
-              "Offset 0: Found an illegal open bracket (in context, this is [).  Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset END: Truly unmatched open bracket found.\n");
+              "Offset 0 or maybe 0: Found an illegal open bracket (in context, this is [).  Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset END: Truly unmatched open bracket found.\n");
        shelp("{",
-              "Offset 0: Found an illegal open bracket (in context, this is {).  Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset END: Truly unmatched open bracket found.\n");
+              "Offset 0 or maybe 0: Found an illegal open bracket (in context, this is {).  Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset END: Truly unmatched open bracket found.\n");
        shelp("DD", "");
        shelp("DD]",
-              "Offset 2: Found a truly unmatched close bracket, ] or }.\nOffset 2: Found a closing bracket without a matching open bracket.  Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
+              "Offset 2 or maybe 2: Found a truly unmatched close bracket, ] or }.\nOffset 2 or maybe 2: Found a closing bracket without a matching open bracket.  Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");

-        shelp("///NYA", "Offset 1: Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\nOffset END: Slashes are supposed to occur in pairs, but the input had an unmatched '/' character.\n");
+        shelp("///NYA", "Offset 1 or maybe 1: Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\nOffset END: Slashes are supposed to occur in pairs, but the input had an unmatched '/' character.\n");
        shelp("/NYA/", "");
        shelp("[?][BP][LS][DD1][DD2][DDD][DR][# (<{A COMMENT)}>]", "");
        shelp("[LS][# A [[[[[COMMENT][LS]",
-              "Offset 9: Found an open bracket within a [#COMMENT]-style comment.  Brackets may not appear in comments.\n"
-              + "Offset 10: Found an open bracket within a [#COMMENT]-style comment.  Brackets may not appear in comments.\n"
-              + "Offset 11: Found an open bracket within a [#COMMENT]-style comment.  Brackets may not appear in comments.\n"
-              + "Offset 12: Found an open bracket within a [#COMMENT]-style comment.  Brackets may not appear in comments.\n"
-              + "Offset 13: Found an open bracket within a [#COMMENT]-style comment.  Brackets may not appear in comments.\n");
+              "Offset 9 or maybe 9: Found an open bracket within a [#COMMENT]-style comment.  Brackets may not appear in comments.\n"
+              + "Offset 10 or maybe 10: Found an open bracket within a [#COMMENT]-style comment.  Brackets may not appear in comments.\n"
+              + "Offset 11 or maybe 11: Found an open bracket within a [#COMMENT]-style comment.  Brackets may not appear in comments.\n"
+              + "Offset 12 or maybe 12: Found an open bracket within a [#COMMENT]-style comment.  Brackets may not appear in comments.\n"
+              + "Offset 13 or maybe 13: Found an open bracket within a [#COMMENT]-style comment.  Brackets may not appear in comments.\n");
        shelp("[ILLEGAL COMMENT]",
-              "Offset 0: Found an illegal open bracket (in context, this is [ILLEGAL C...).  Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset 16: Found a closing bracket without a matching open bracket.  Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
+              "Offset 0 or maybe 0: Found an illegal open bracket (in context, this is [ILLEGAL C...).  Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset 16 or maybe 16: Found a closing bracket without a matching open bracket.  Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
        shelp("(BSKYABS GRO)", ""); // DLC WHAT ARE THESE FOR?
-        shelp("BSKYABS GRO)", "Offset 11: Unexpected closing parenthesis, ), found.\n");
+        shelp("BSKYABS GRO)", "Offset 11 or maybe 11: Unexpected closing parenthesis, ), found.\n");
        shelp("BSKYABS GRO(", "Offset END: Unmatched open parenthesis, (, found.\n");
-        shelp("((NESTAGE))", "Offset 1: Found an illegal open parenthesis, (.  Nesting of parentheses is not allowed.\nOffset 10: Unexpected closing parenthesis, ), found.\n");
+        shelp("((NESTAGE))", "Offset 1 or maybe 1: Found an illegal open parenthesis, (.  Nesting of parentheses is not allowed.\nOffset 10 or maybe 10: Unexpected closing parenthesis, ), found.\n");
        shelp("(BA)(PA)NYA(CA)", "");
        shelp("NYAx", "");
        shelp("NYA x", "");
@ -7033,9 +7147,9 @@ tstHelper("ZUR");
        shelp("(NYA ", "Offset END: Unmatched open parenthesis, (, found.\n");
        shelp("[*NYA ", "Offset END: Unmatched open bracket found.  A correction does not terminate.\n");
        shelp("?", "", "[QUESTION:{?}]");
-        shelp("KHAN~ BAR ", "Offset 4: Found an illegal character, ~, with ordinal 126.\n");
+        shelp("KHAN~ BAR ", "Offset 4 or maybe 4: Found an illegal character, ~, with ordinal 126.\n");
        shelp("[* Correction with []]",
-              "Offset 5: Found an illegal character, r, with ordinal 114.\nOffset 6: Found an illegal character, r, with ordinal 114.\nOffset 7: Found an illegal character, e, with ordinal 101.\nOffset 8: Found an illegal character, c, with ordinal 99.\nOffset 14: Found an illegal character, w, with ordinal 119.\nOffset 19: Found an illegal open bracket (in context, this is []]).  Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset 21: Found a closing bracket without a matching open bracket.  Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
+              "Offset 5 or maybe 5: Found an illegal character, r, with ordinal 114.\nOffset 6 or maybe 6: Found an illegal character, r, with ordinal 114.\nOffset 7 or maybe 7: Found an illegal character, e, with ordinal 101.\nOffset 8 or maybe 8: Found an illegal character, c, with ordinal 99.\nOffset 14 or maybe 14: Found an illegal character, w, with ordinal 119.\nOffset 19 or maybe 19: Found an illegal open bracket (in context, this is []]).  Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset 21 or maybe 21: Found a closing bracket without a matching open bracket.  Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");

        // DLC FIXME: the line SDIG PA'I GROGS PO'I LAG TU SON PAR 'GYUR PA is followed by a blank line.  Note that it's "PA", not "PA ", ending it.  Autocorrect to the latter.

@ -7051,8 +7165,8 @@ tstHelper("ZUR");
            uhelp(" 1\\ ", "\u0f0b\u0f21\u0f84\u0f0b");
        }
        shelp("K\\,",
-              "Offset 1: Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly.  Sorry!  Please do complain to the maintainers.\n",
-              "[TIBETAN_NON_PUNCTUATION:{K}, ERROR:{\\}, TIBETAN_PUNCTUATION:{,}]");
+              "Offset 1 or maybe 1: Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly.  Sorry!  Please do complain to the maintainers.\n",
+              "[TIBETAN_NON_PUNCTUATION:{K}, ERROR:{Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly.  Sorry!  Please do complain to the maintainers.}, TIBETAN_PUNCTUATION:{,}]");


        shelp("MTHAR%", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR%}]");
@ -7073,15 +7187,15 @@ tstHelper("ZUR");
        shelp("@01A.3 ", "", "[FOLIO_MARKER:{@01A.3}, TIBETAN_PUNCTUATION:{ }]");
        shelp("@001 ", "", "[FOLIO_MARKER:{@001}, TIBETAN_PUNCTUATION:{ }]");
        shelp("@19-20A",
-              "Offset 0: Found an illegal at sign, @ (in context, this is @19-20A).  @012B is an example of a legal folio marker.\n",
-              "[ERROR:{@}, TIBETAN_NON_PUNCTUATION:{19-20A}]");  // DLC FIXME: yes it occurs in the kangyur.
+              "Offset 0 or maybe 0: Found an illegal at sign, @ (in context, this is @19-20A).  @012B is an example of a legal folio marker.\n",
+              "[ERROR:{Found an illegal at sign, @ (in context, this is @19-20A).  @012B is an example of a legal folio marker.}, TIBETAN_NON_PUNCTUATION:{19-20A}]");  // DLC FIXME: yes it occurs in the kangyur.
        shelp("@[7B]", "");
        shelp("@012A.3KA",
              "",
              "[FOLIO_MARKER:{@012A.3}, TIBETAN_NON_PUNCTUATION:{KA}]");
        shelp("@012A.34",
-              "Offset 0: Found an illegal at sign, @ (in context, this is @012A.34).  This folio marker has a period, '.', at the end of it, which is illegal.\n",
-              "[ERROR:{@012A.}, TIBETAN_NON_PUNCTUATION:{34}]");
+              "Offset 0 or maybe 0: Found an illegal at sign, @ (in context, this is @012A.34).  This folio marker has a period, '.', at the end of it, which is illegal.\n",
+              "[ERROR:{Found an illegal at sign, @ (in context, this is @012A.34).  This folio marker has a period, '.', at the end of it, which is illegal.}, TIBETAN_NON_PUNCTUATION:{34}]");
        shelp("@[07B]", "");
        shelp("@[00007B]", "");
        shelp("@7B", "");
@ -7097,8 +7211,8 @@ tstHelper("ZUR");
        shelp("{ DD }", "", "[DD:{{ DD }}]"); // TD3790E2.ACT
        shelp("{ BP }", "", "[BP:{{ BP }}]"); // TD3790E2.ACT
        shelp("//NYA\\\\",
-              "Offset 1: Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\nOffset 5: Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly.  Sorry!  Please do complain to the maintainers.\nOffset 6: Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly.  Sorry!  Please do complain to the maintainers.\n",
-              "[START_SLASH:{/}, ERROR:{//}, END_SLASH:{/}, TIBETAN_NON_PUNCTUATION:{NYA}, ERROR:{\\}, ERROR:{\\}]");
+              "Offset 1 or maybe 1: Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\nOffset 5 or maybe 5: Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly.  Sorry!  Please do complain to the maintainers.\nOffset 6 or maybe 6: Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly.  Sorry!  Please do complain to the maintainers.\n",
+              "[START_SLASH:{/}, ERROR:{Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.}, END_SLASH:{/}, TIBETAN_NON_PUNCTUATION:{NYA}, ERROR:{Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly.  Sorry!  Please do complain to the maintainers.}, ERROR:{Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly.  Sorry!  Please do complain to the maintainers.}]");

    }
    private static void uhelp(String acip) {
@ -7106,7 +7220,7 @@ tstHelper("ZUR");
    }
    private static void uhelp(String acip, String expectedUnicode) {
        StringBuffer errors = new StringBuffer();
-        String unicode = ACIPConverter.convertToUnicode(acip, errors);
+        String unicode = ACIPConverter.convertToUnicode(acip, errors, null, true);
        if (null == unicode) {
            if (null != expectedUnicode && "none" != expectedUnicode) {
                System.out.println("No unicode exists for " + acip + " but you expected " + org.thdl.tib.text.tshegbar.UnicodeUtils.unicodeStringToPrettyString(expectedUnicode));
@ -8729,22 +8843,22 @@ tstHelper("shKA");
 }
 /* DLC FIXME: add test cases: from R0021F.ACE: ambiguous Tibetan/Sanskrit:

- BDA'  þþþþ 
-B+DA   þþþ
-DBANG  þþþ 
-D+BA   þþþ
-DGA'  þþþþ 
-D+GA   þþþ
-DGRA   þþþ 
-D+GRA  þþþ
-DGYESþþþþþ 
-D+GYA  þþþ 
-DMAR  þþþþ
-D+MA   þþþ
-GDA'  þþþþ
-G+DA   þþþ
-GNAD  þþþþ
-G+NA   þþþ
-MNA'  þþþþ
-M+NA    þþþ 
+BDA'
+B+DA
+DBANG
+D+BA
+DGA'
+D+GA
+DGRA
+D+GRA
+DGYES
+D+GYA
+DMAR
+D+MA
+GDA'
+G+DA
+GNAD
+G+NA
+MNA'
+M+NA
 */
--- a/source/org/thdl/tib/text/ttt/TPairList.java
+++ b/source/org/thdl/tib/text/ttt/TPairList.java
@ -520,7 +520,8 @@ class TPairList {
     *  corresponds to exactly one Tibetan grapheme cluster (i.e.,
     *  stack).  Note that U+0F7F (ACIP {:}) is part of a stack, not a
     *  stack all on its own. */
-    void populateWithTGCPairs(ArrayList pl, ArrayList indexList, int index) {
+    void populateWithTGCPairs(ArrayList pl,
+                              ArrayList indexList, int index) {
        int sz = size();
        if (sz == 0) {
            return;
@ -540,8 +541,8 @@ class TPairList {
            // The last pair:
            TPair p = get(i);
            ThdlDebug.verify(!"+".equals(p.getRight()));
-            int where;
            boolean add_U0F7F = false;
+            int where;
            if (p.getRight() != null
                && (where = p.getRight().indexOf(':')) >= 0) {
                // this ':' guy is his own TGCPair.
@ -579,27 +580,21 @@ class TPairList {
            }
            TGCPair tp;
            indexList.add(new Integer(index));
-            tp = new TGCPair(lWylie.toString()
-                             + (hasNonAVowel
-                                ? ACIPRules.getWylieForACIPVowel(p.getRight())
-                                : ""),
+            tp = new TGCPair(lWylie.toString(),
+                             (hasNonAVowel
+                              ? ACIPRules.getWylieForACIPVowel(p.getRight())
+                              : ""),
                             (isNumeric
-                              ? TGCPair.OTHER
-                              : (hasNonAVowel
-                                 ? (isSanskrit
-                                    ? TGCPair.SANSKRIT_WITH_VOWEL
-                                    : (isTibetan
-                                       ? TGCPair.CONSONANTAL_WITH_VOWEL
-                                       : TGCPair.OTHER))
-                                 : (isSanskrit
-                                    ? TGCPair.SANSKRIT_WITHOUT_VOWEL
-                                    : (isTibetan
-                                       ? TGCPair.CONSONANTAL_WITHOUT_VOWEL
-                                       : TGCPair.OTHER)))));
+                              ? TGCPair.TYPE_OTHER
+                              : (isSanskrit
+                                 ? TGCPair.TYPE_SANSKRIT
+                                 : (isTibetan
+                                    ? TGCPair.TYPE_TIBETAN
+                                    : TGCPair.TYPE_OTHER))));
            pl.add(tp);
            if (add_U0F7F) {
                indexList.add(new Integer(index));
-                pl.add(new TGCPair("H", TGCPair.OTHER));
+                pl.add(new TGCPair("H", null, TGCPair.TYPE_OTHER));
            }
        }
    }
--- a/source/org/thdl/tib/text/ttt/TParseTree.java
+++ b/source/org/thdl/tib/text/ttt/TParseTree.java
@ -91,7 +91,7 @@ class TParseTree {
        ParseIterator pi = getParseIterator();
        while (pi.hasNext()) {
            TStackList sl = pi.next();
-            if (sl.isLegalTshegBar().isLegal) {
+            if (sl.isLegalTshegBar(false).isLegal) {
                sll.add(sl);
            }
        }
@ -118,12 +118,12 @@ class TParseTree {
     *  a unique non-illegal parse, you get it.  If there's not a
     *  unique answer, null is returned. */
    // {TZANDRA} is not solved by this, DLC NOW.  Solve PADMA PROBLEM!
-
    // DLC by using this we can get rid of single-sanskrit-gc, eh?
    public TStackList getBestParse() {
-        TStackListList up = getUniqueParse();
+        TStackListList up = getUniqueParse(false);
        if (up.size() == 1)
            return up.get(0);
+
        up = getNonIllegalParses();
        int sz = up.size();
        if (sz == 1) {
@ -192,14 +192,17 @@ class TParseTree {
     *  legal parses if there two or more equally good parses.  By
     *  &quot;legal&quot;, we mean a sequence of stacks that is legal
     *  by the rules of Tibetan tsheg bar syntax (sometimes called
-     *  spelling). */
-    public TStackListList getUniqueParse() {
+     *  spelling).
+     *  @param noPrefixTests true if you want to pretend that every
+     *  stack can take every prefix, which is not the case in
+     *  reality */
+    public TStackListList getUniqueParse(boolean noPrefixTests) {
        TStackListList allLegalParses = new TStackListList(2); // save memory
        TStackListList legalParsesWithVowelOnRoot = new TStackListList(1);
        ParseIterator pi = getParseIterator();
        while (pi.hasNext()) {
            TStackList sl = pi.next();
-            BoolPair bpa = sl.isLegalTshegBar();
+            BoolPair bpa = sl.isLegalTshegBar(noPrefixTests);
            if (bpa.isLegal) {
                if (bpa.isLegalAndHasAVowelOnRoot)
                    legalParsesWithVowelOnRoot.add(sl);
@ -253,13 +256,23 @@ class TParseTree {
    public String getWarning(boolean paranoid,
                             TPairList pl,
                             String originalACIP) {
-        TStackListList up = getUniqueParse();
+
+        {
+            TStackList bestParse = getBestParse();
+            TStackListList noPrefixTestsUniqueParse = getUniqueParse(true);
+            if (noPrefixTestsUniqueParse.size() == 1
+                && !noPrefixTestsUniqueParse.get(0).equals(bestParse)) {
+                return "Warning: We're going with " + bestParse + ", but only because our knowledge of prefix rules says that " + noPrefixTestsUniqueParse.get(0) + " is not a legal Tibetan tsheg bar (\"syllable\")";
+            }
+        }
+
+        TStackListList up = getUniqueParse(false);
        if (null == up || up.size() != 1) {
            boolean isLastStack[] = new boolean[1];
            TStackListList nip = getNonIllegalParses();
            if (nip.size() != 1) {
                if (null == getBestParse()) {
-                    return "There's not even a unique, non-illegal parse for ACIP {" + ((null != originalACIP) ? originalACIP : recoverACIP()) + "}";
+                    return "Warning: There's not even a unique, non-illegal parse for ACIP {" + ((null != originalACIP) ? originalACIP : recoverACIP()) + "}";
                } else {
                    if (getBestParse().hasStackWithoutVowel(pl, isLastStack)) {
                        if (isLastStack[0]) {
@ -269,7 +282,7 @@ class TParseTree {
                        }
                    }
                    if (paranoid) {
-                        return "Though the ACIP {" + ((null != originalACIP) ? originalACIP : recoverACIP()) + "} is unambiguous, it would be more computer-friendly if + signs were used to stack things because there are two (or more) ways to interpret this ACIP if you're not careful.";
+                        return "Warning: Though the ACIP {" + ((null != originalACIP) ? originalACIP : recoverACIP()) + "} is unambiguous, it would be more computer-friendly if + signs were used to stack things because there are two (or more) ways to interpret this ACIP if you're not careful.";
                    }
                }
            } else {
--- a/source/org/thdl/tib/text/ttt/TStackList.java
+++ b/source/org/thdl/tib/text/ttt/TStackList.java
@ -125,15 +125,17 @@ class TStackList {
     *  Tibetan syntax (sometimes called rules of spelling).  If this
     *  is legal, then {@link BoolPair#isLegalAndHasAVowelOnRoot} will
     *  be true if and only if there is an explicit {A} vowel on the
-     *  root stack. */
-    public BoolPair isLegalTshegBar() {
-        // DLC handle PADMA and other Tibetanized Sanskrit fellows.  Right now we only handle single-stack guys.
+     *  root stack.
+     *  @param noPrefixTests true if you want to pretend that every
+     *  stack can take every prefix, which is not the case in
+     *  reality */
+    public BoolPair isLegalTshegBar(boolean noPrefixTests) {
+        // DLC handle PADMA and other Tibetanized Sanskrit fellows consistently.  Right now we only treat single-stack Sanskrit guys as legal.

        TTGCList tgcList = new TTGCList(this);
        StringBuffer warnings = new StringBuffer();
        String candidateType
-            = TibTextUtils.getClassificationOfTshegBar(tgcList, warnings);
-        // System.out.println("DLC: " + toString() + " has candidateType " + candidateType + " and warnings " + warnings);
+            = TibTextUtils.getClassificationOfTshegBar(tgcList, warnings, noPrefixTests);

        // preliminary answer:
        boolean isLegal = (candidateType != "invalid");