From d5ad7602304b290d11e98258785f4e8a223cab84 Mon Sep 17 00:00:00 2001
From: dchandler <dchandler>
Date: Sat, 23 Aug 2003 22:03:37 +0000
Subject: [PATCH] TMW->Wylie conversion now takes advantage of prefix rules,
 the rules that say "ya can take a ga prefix" etc.

The ACIP->Unicode converter now gives warnings (optionally, and by
default, inline).  This converter now produces output even when
lexical errors occur, but the output has errors and warnings inline.
---
 source/org/thdl/tib/input/DuffPaneTest.java   |  51 ++--
 ...O_THDL_WYLIETest1ResultConversion.expected |   2 +-
 source/org/thdl/tib/text/TGCPair.java         |  84 +++++-
 source/org/thdl/tib/text/TibTextUtils.java    | 254 ++++++++++++------
 .../org/thdl/tib/text/TibetanMachineWeb.java  |  53 +++-
 source/org/thdl/tib/text/tibwn.ini            |  23 +-
 .../thdl/tib/text/tshegbar/LegalTshegBar.java |  12 +-
 .../thdl/tib/text/tshegbar/UnicodeUtils.java  |   8 +
 .../org/thdl/tib/text/ttt/ACIPConverter.java  |  86 ++++--
 .../tib/text/ttt/ACIPTshegBarScanner.java     |  85 +++---
 source/org/thdl/tib/text/ttt/PackageTest.java | 214 +++++++++++----
 source/org/thdl/tib/text/ttt/TPairList.java   |  33 +--
 source/org/thdl/tib/text/ttt/TParseTree.java  |  31 ++-
 source/org/thdl/tib/text/ttt/TStackList.java  |  12 +-
 14 files changed, 678 insertions(+), 270 deletions(-)
diff --git a/source/org/thdl/tib/input/DuffPaneTest.java b/source/org/thdl/tib/input/DuffPaneTest.java
index 102e256..55705c9 100644
--- a/source/org/thdl/tib/input/DuffPaneTest.java
+++ b/source/org/thdl/tib/input/DuffPaneTest.java
@@ -102,19 +102,23 @@ public class DuffPaneTest extends TestCase {
         ensureKeysGiveCorrectWylie("gya");
         ensureKeysGiveCorrectWylie("g.ya");
         ensureKeysGiveCorrectWylie("bya");
-        ensureKeysGiveCorrectWylie("b.ya");
+        ensureKeysGiveCorrectWylie("b.ya", "baya");
         ensureKeysGiveCorrectWylie("mya");
-        ensureKeysGiveCorrectWylie("m.ya");
-        ensureKeysGiveCorrectWylie("'ya");
-        ensureKeysGiveCorrectWylie("'.ya", "'ya");
-        ensureKeysGiveCorrectWylie("dya");
-        ensureKeysGiveCorrectWylie("d.ya", "dya");
+        ensureKeysGiveCorrectWylie("m.ya", "maya");
+        ensureKeysGiveCorrectWylie("'ya", "'aya");
+        ensureKeysGiveCorrectWylie("'.ya", "'aya");
+        ensureKeysGiveCorrectWylie("dya",
+                                   "daya");
+        ensureKeysGiveCorrectWylie("d.ya",
+                                   "daya");
         ensureKeysGiveCorrectWylie("grwa");
-        ensureKeysGiveCorrectWylie("g.rwa");
+        ensureKeysGiveCorrectWylie("g.rwa",
+                                   "garwa");
         ensureKeysGiveCorrectWylie("gra");
         ensureKeysGiveCorrectWylie("dra");
         ensureKeysGiveCorrectWylie("drwa");
-        ensureKeysGiveCorrectWylie("d.rwa");
+        ensureKeysGiveCorrectWylie("d.rwa",
+                                   "darwa");
         ensureKeysGiveCorrectWylie("g.r", "gar");
         ensureKeysGiveCorrectWylie("d.r", "dar");
         ensureKeysGiveCorrectWylie("'.r", "'ar");
@@ -134,7 +138,7 @@ public class DuffPaneTest extends TestCase {
         ensureKeysGiveCorrectWylie("t.sa",
                                    "tas");
 
-        ensureKeysGiveCorrectWylie("d.za");
+        ensureKeysGiveCorrectWylie("d.za", "daza");
         ensureKeysGiveCorrectWylie("dza");
 
         ensureKeysGiveCorrectWylie("s.ha",
@@ -219,7 +223,7 @@ public class DuffPaneTest extends TestCase {
 
         ensureKeysGiveCorrectWylie("b.lag");
         ensureKeysGiveCorrectWylie("blg",
-                                   "blga");
+                                   "balga");
 
         ensureKeysGiveCorrectWylie("b.las",
                                    "bals");
@@ -244,21 +248,24 @@ public class DuffPaneTest extends TestCase {
                                    "bras");
         ensureKeysGiveCorrectWylie("bras");
 
-        ensureKeysGiveCorrectWylie("d.wa");
+        ensureKeysGiveCorrectWylie("d.wa",
+                                   "dawa");
         ensureKeysGiveCorrectWylie("dawa",
-                                   "d.wa");
+                                   "dawa");
         ensureKeysGiveCorrectWylie("dwa");
 
-        ensureKeysGiveCorrectWylie("g.wa");
+        ensureKeysGiveCorrectWylie("g.wa",
+                                   "gawa");
         ensureKeysGiveCorrectWylie("gawa",
-                                   "g.wa");
+                                   "gawa");
         ensureKeysGiveCorrectWylie("gwa");
 
         ensureKeysGiveCorrectWylie("'.wa",
-                                   "'wa");
+                                   "'awa");
         ensureKeysGiveCorrectWylie("'awa",
-                                   "'wa");
-        ensureKeysGiveCorrectWylie("'wa");
+                                   "'awa");
+        ensureKeysGiveCorrectWylie("'wa",
+                                   "'awa");
 
         ensureKeysGiveCorrectWylie("gyg",
                                    "g.yag");
@@ -282,7 +289,8 @@ public class DuffPaneTest extends TestCase {
         ensureKeysGiveCorrectWylie("ma.a.asa",
                                    "mas");
 
-        ensureKeysGiveCorrectWylie("'ka");
+        ensureKeysGiveCorrectWylie("'ka",
+                                   "'aka");
 
         ensureKeysGiveCorrectWylie("'gas");
 
@@ -319,8 +327,9 @@ public class DuffPaneTest extends TestCase {
                                    "lamanga");
 
         ensureKeysGiveCorrectWylie("b.m.ng",
-                                   "bmang");
-        ensureKeysGiveCorrectWylie("bmang");
+                                   "bamanga");
+        ensureKeysGiveCorrectWylie("bmang",
+                                   "bamanga");
 
         ensureKeysGiveCorrectWylie("gdams");
         ensureKeysGiveCorrectWylie("g.d.m.s.",
@@ -372,7 +381,7 @@ public class DuffPaneTest extends TestCase {
         ensureKeysGiveCorrectWylie("fivikikhigingicichijinyitithidinipiphibimitsitshidziwizhizi'iyirilishisihiTiThiDiNiShi");
 
         ensureKeysGiveCorrectWylie("don't touch my coffee/that makes me very angry/supersize my drink",
-                                   "dona'ata tocha mya cofafe/thata mkes me veraya angaraya/superasize mya drinaka");
+                                   "dona'ata tocha mya cofafe/thata makesa me veraya angaraya/superasize mya drinaka");
 
     }
 }
diff --git a/source/org/thdl/tib/input/TMW_RTF_TO_THDL_WYLIETest1ResultConversion.expected b/source/org/thdl/tib/input/TMW_RTF_TO_THDL_WYLIETest1ResultConversion.expected
index bdcd796..be3a254 100644
--- a/source/org/thdl/tib/input/TMW_RTF_TO_THDL_WYLIETest1ResultConversion.expected
+++ b/source/org/thdl/tib/input/TMW_RTF_TO_THDL_WYLIETest1ResultConversion.expected
@@ -28,7 +28,7 @@ zur mig nyag phran tsam gyis dge ba'i gzugs can 'dus ma byas//\par
 \par
 yid 'ong bzhin ras zla gzhon 'khor lo gnyis skyes la//\par
 'khrul ba ster yang 'phyang mo sel byed mgo skyes kyi//\par
-bai DUr mthing kha'i lan bu rab 'phyang dbyangs can ma//\par
+bai DUra mthing kha'i lan bu rab 'phyang dbyangs can ma//\par
 smra ba'i dbang phyug ngag gi rgyal po nyer grub mdzod//\par
 \par
 gangs can lha lam yangs pa'i khyon 'dir rgyal ba'i bstan pa bcu gnyis bdag po'i gur khang mchog/\par
diff --git a/source/org/thdl/tib/text/TGCPair.java b/source/org/thdl/tib/text/TGCPair.java
index d681cbd..9049b98 100644
--- a/source/org/thdl/tib/text/TGCPair.java
+++ b/source/org/thdl/tib/text/TGCPair.java
@@ -25,7 +25,7 @@ package org.thdl.tib.text;
     context-insensitive THDL Extended Wylie representation.  NOTE
     WELL: this is not a real grapheme cluster; I'm misusing the term
     (FIXME).  It's actually whole or part of one.  It's part of one
-    when this is a vowel or U+0F7F alone.
+    when this is U+0F7F alone.
 
     @author David Chandler */
 public class TGCPair {
@@ -37,14 +37,84 @@ public class TGCPair {
     public static final int SANSKRIT_WITHOUT_VOWEL = 5;
     public static final int SANSKRIT_WITH_VOWEL = 6;
 
-    public String wylie;
-    public int classification;
-    public TGCPair(String wylie, int classification) {
-        this.wylie = wylie;
-        this.classification = classification;
+    public static final int TYPE_OTHER = 31;
+    public static final int TYPE_SANSKRIT = 32;
+    public static final int TYPE_TIBETAN = 33;
+
+    // Sanskrit or Tibetan consonant, or number, or oddball:
+    private String consonantWylie;
+    private String vowelWylie;
+    public String getConsonantWylie() {
+        return consonantWylie;
     }
+    public String getVowelWylie() {
+        return vowelWylie;
+    }
+    /** Cludge. */
+    public void setWylie(String x) {
+        consonantWylie = x;
+        vowelWylie = null;
+    }
+    public String getWylie() {
+        StringBuffer b = new StringBuffer();
+        if (consonantWylie != null) {
+            // we may have {p-y}, but the user wants to see {py}.
+            for (int i = 0; i < consonantWylie.length(); i++) {
+                char ch = consonantWylie.charAt(i);
+                if ('-' != ch)
+                    b.append(ch);
+            }
+        }
+        if (vowelWylie != null)
+            b.append(vowelWylie);
+        return b.toString();
+    }
+    public int classification;
+    /** Constructs a new TGCPair with (Tibetan or Sanskrit) consonant
+     *  consonantWylie and vowel vowelWylie.  Use
+     *  classification==TYPE_OTHER for numbers, lone vowels, marks,
+     *  etc.  Use classification==TYPE_TIBETAN for Tibetan (not
+     *  Tibetanized Sanskrit) and classification=TYPE_SANSKRIT for
+     *  Tibetanized Sanskrit. */
+    public TGCPair(String consonantWylie, String vowelWylie, int classification) {
+        if ("".equals(vowelWylie))
+            vowelWylie = null;
+        // Technically, we don't need the following check, but it's
+        // nice for consistency's sake.
+        if ("".equals(consonantWylie))
+            consonantWylie = null;
+
+        // DLC FIXME: for speed, make these assertions:
+        if (classification != TYPE_OTHER
+            && classification != TYPE_TIBETAN
+            && classification != TYPE_SANSKRIT) {
+            throw new IllegalArgumentException("Bad classification " + classification + ".");
+        }
+        int realClassification = -37;
+        if (vowelWylie == null && classification == TYPE_TIBETAN)
+            realClassification = CONSONANTAL_WITHOUT_VOWEL;
+        if (vowelWylie != null && classification == TYPE_TIBETAN)
+            realClassification = CONSONANTAL_WITH_VOWEL;
+        if (vowelWylie == null && classification == TYPE_SANSKRIT)
+            realClassification = SANSKRIT_WITHOUT_VOWEL;
+        if (vowelWylie != null && classification == TYPE_SANSKRIT)
+            realClassification = SANSKRIT_WITH_VOWEL;
+        if (consonantWylie == null) {
+            if (classification != TYPE_OTHER)
+                throw new IllegalArgumentException("That's the very definition of a lone vowel.");
+            realClassification = LONE_VOWEL;
+        } else {
+            if (classification == TYPE_OTHER)
+                realClassification = OTHER;
+        }
+
+        this.consonantWylie = consonantWylie;
+        this.vowelWylie = vowelWylie;
+        this.classification = realClassification;
+    }
+
     public String toString() {
-        return "<TGCPair wylie=" + wylie + " classification="
+        return "<TGCPair wylie=" + getWylie() + " classification="
             + classification + "/>";
     }
 }
diff --git a/source/org/thdl/tib/text/TibTextUtils.java b/source/org/thdl/tib/text/TibTextUtils.java
index f42695a..7b5e418 100644
--- a/source/org/thdl/tib/text/TibTextUtils.java
+++ b/source/org/thdl/tib/text/TibTextUtils.java
@@ -25,6 +25,9 @@ import javax.swing.text.rtf.RTFEditorKit;
 import java.io.*;
 
 import org.thdl.util.ThdlDebug;
+import org.thdl.tib.text.tshegbar.LegalTshegBar;
+import org.thdl.tib.text.tshegbar.UnicodeConstants;
+import org.thdl.tib.text.tshegbar.UnicodeUtils;
 
 /**
 * Provides methods for converting back and forth between Extended
@@ -846,86 +849,64 @@ public class TibTextUtils implements THDLWylieConstants {
         // sz is an overestimate (speeds us up, wastes some memory).
         TMWGCList gcs = new TMWGCList(sz);
 
-        StringBuffer buildingUpGc = new StringBuffer();
+        StringBuffer buildingUpVowel = new StringBuffer(); // for {cui}, we append to this guy twice.
+        String nonVowelWylie = null; // for the "c" in {cui}
+        int pairType = TGCPair.TYPE_OTHER;
 
-        boolean consonantal_with_vowel = false;
-        boolean buildingUpSanskrit = false;
         for (int i = 0; i < sz; i++) {
             DuffCode dc = (DuffCode)glyphList.get(i);
             String wylie = TibetanMachineWeb.getWylieForGlyph(dc, noSuchWylie);
-            boolean containsWylieVowel = false;
             boolean buildingUpSanskritNext = false;
             if ((buildingUpSanskritNext
                  = TibetanMachineWeb.isWylieSanskritConsonantStack(wylie))
                 || TibetanMachineWeb.isWylieTibetanConsonantOrConsonantStack(wylie)) {
-                if (buildingUpGc.length() > 0) {
-                    gcs.add(new TGCPair(buildingUpGc.toString(),
-                                        consonantal_with_vowel
-                                        ? (buildingUpSanskrit
-                                           ? TGCPair.SANSKRIT_WITH_VOWEL
-                                           : TGCPair.CONSONANTAL_WITH_VOWEL)
-                                        : (buildingUpSanskrit
-                                           ? TGCPair.SANSKRIT_WITHOUT_VOWEL
-                                           : TGCPair.CONSONANTAL_WITHOUT_VOWEL)));
-                    buildingUpGc.delete(0, buildingUpGc.length());
+                if (buildingUpVowel.length() > 0 || null != nonVowelWylie) {
+                    gcs.add(new TGCPair(nonVowelWylie,
+                                        buildingUpVowel.toString(),
+                                        pairType));
+                    buildingUpVowel.delete(0, buildingUpVowel.length());
                 }
-                buildingUpGc.append(wylie);
-                consonantal_with_vowel = false;
-                buildingUpSanskrit = buildingUpSanskritNext;
-            } else if ((containsWylieVowel
-                        = TibetanMachineWeb.isWylieAdornmentAndContainsVowel(wylie))
+                // We want {p-y}, not {py}.
+                nonVowelWylie
+                    = TibetanMachineWeb.getHashKeyForGlyph(dc.getFontNum(), dc.getCharNum());
+                pairType = (buildingUpSanskritNext
+                            ? TGCPair.TYPE_SANSKRIT
+                            : TGCPair.TYPE_TIBETAN);
+            } else if (TibetanMachineWeb.isWylieAdornmentAndContainsVowel(wylie)
                        || TibetanMachineWeb.isWylieAdornment(wylie)) {
-
-                if (buildingUpGc.length() > 0) {
-                    buildingUpGc.append(wylie);
-                    if (containsWylieVowel) {
-                        if (debug)
-                            System.out.println("DEBUG: with_vowel is true thanks to " + wylie);
-                        consonantal_with_vowel = true;
-                    }
-                    // do not clear; we might have {cui} or {hUM}, e.g.
-                } else {
-                    gcs.add(new TGCPair(wylie,
-                                        TGCPair.LONE_VOWEL));
-                    consonantal_with_vowel = false;
-                }
+                buildingUpVowel.append(wylie);
             } else {
                 // number or weird thing:
 
-                if (buildingUpGc.length() > 0) {
-                    gcs.add(new TGCPair(buildingUpGc.toString(),
-                                        consonantal_with_vowel
-                                        ? (buildingUpSanskrit
-                                           ? TGCPair.SANSKRIT_WITH_VOWEL
-                                           : TGCPair.CONSONANTAL_WITH_VOWEL)
-                                        : (buildingUpSanskrit
-                                           ? TGCPair.SANSKRIT_WITHOUT_VOWEL
-                                           : TGCPair.CONSONANTAL_WITHOUT_VOWEL)));
-                    buildingUpGc.delete(0, buildingUpGc.length());
+                if (buildingUpVowel.length() > 0 || null != nonVowelWylie) {
+                    gcs.add(new TGCPair(nonVowelWylie,
+                                        buildingUpVowel.toString(),
+                                        pairType));
+                    buildingUpVowel.delete(0, buildingUpVowel.length());
+                    nonVowelWylie = null;
                 }
-                gcs.add(new TGCPair(wylie, TGCPair.OTHER));
-                consonantal_with_vowel = false;
-                buildingUpSanskrit = false;
+                gcs.add(new TGCPair(wylie, null, TGCPair.TYPE_OTHER));
+                pairType = TGCPair.TYPE_OTHER;
             }
         }
-        if (buildingUpGc.length() > 0) {
-            gcs.add(new TGCPair(buildingUpGc.toString(),
-                                consonantal_with_vowel
-                                ? (buildingUpSanskrit
-                                   ? TGCPair.SANSKRIT_WITH_VOWEL
-                                   : TGCPair.CONSONANTAL_WITH_VOWEL)
-                                : (buildingUpSanskrit
-                                   ? TGCPair.SANSKRIT_WITHOUT_VOWEL
-                                   : TGCPair.CONSONANTAL_WITHOUT_VOWEL)));
+        if (buildingUpVowel.length() > 0 || null != nonVowelWylie) {
+            gcs.add(new TGCPair(nonVowelWylie,
+                                buildingUpVowel.toString(),
+                                pairType));
         }
-        buildingUpGc = null;
         return gcs;
     }
 
 
+    /** Returns a string that classifies gcs as a legal Tibetan tsheg
+     *  bar, a single Sanskrit grapheme cluster
+     *  ("single-sanskrit-gc"), or invalid ("invalid").  If
+     *  noPrefixTests is true, then ggyi will be seen as a
+     *  "prefix-root", even though gya doesn't take a ga prefix. */
     public static String getClassificationOfTshegBar(TGCList gcs,
                                                      // DLC the warnings are Wylie-specific
-                                                     StringBuffer warnings) {
+                                                     StringBuffer warnings,
+                                                     boolean noPrefixTests) {
         String candidateType = null;
         // Now that we have grapheme clusters, see if they match any
         // of the "legal tsheg bars":
@@ -937,10 +918,11 @@ public class TibTextUtils implements THDLWylieConstants {
                 || TGCPair.SANSKRIT_WITH_VOWEL == cls)
                 return "single-sanskrit-gc";
         }
+        TGCPair lastPair = null;
         for (int i = 0; i < sz; i++) {
             TGCPair tp = gcs.get(i);
             int cls = tp.classification;
-            String wylie = tp.wylie;
+            String wylie = tp.getWylie();
             if (TGCPair.OTHER == cls) {
                 if (TibetanMachineWeb.isWylieNumber(wylie)) {
                     if (null == candidateType) {
@@ -977,25 +959,44 @@ public class TibTextUtils implements THDLWylieConstants {
                             // peek ahead to distinguish between ba's,
                             // ba'ala and ba'am:
                             TGCPair nexttp = (i+1 < sz) ? gcs.get(i+1) : null;
-                            String nextwylie = (nexttp == null) ? "" : nexttp.wylie;
+                            String nextwylie = (nexttp == null) ? "" : nexttp.getWylie();
                             if (isAppendageNonVowelWylie(nextwylie)) {
                                 candidateType = "maybe-appendaged-prefix/root";
                             } else {
-                                candidateType = "prefix/root-root/suffix";
+                                if (noPrefixTests
+                                    || isLegalPrefixRootCombo(lastPair.getConsonantWylie(),
+                                                              tp.getConsonantWylie()))
+                                    candidateType = "prefix/root-root/suffix";
+                                else
+                                    candidateType = "root-suffix";
                             }
                         } else if (TibetanMachineWeb.isWylieRight(wylie)) {
-                            candidateType = "prefix/root-root/suffix";
+                            if (noPrefixTests
+                                || isLegalPrefixRootCombo(lastPair.getConsonantWylie(),
+                                                          tp.getConsonantWylie()))
+                                candidateType = "prefix/root-root/suffix";
+                            else
+                                candidateType = "root-suffix";
                         } else if (TibetanMachineWeb.isWylieAchungAppendage(wylie)) {
                             candidateType = "appendaged-prefix/root";
                         } else {
-                            candidateType = "prefix-root";
+                            if (noPrefixTests
+                                || isLegalPrefixRootCombo(lastPair.getConsonantWylie(),
+                                                          tp.getConsonantWylie()))
+                                candidateType = "prefix-root";
+                            else {
+                                if (null != warnings)
+                                    warnings.append("Found what would be a prefix-root combo, but the root stack with wylie " + wylie + " does not take the prefix with wylie " + lastPair.getConsonantWylie());
+                                candidateType = "invalid";
+                                break;
+                            }
                         }
                     } else if ("root" == candidateType) {
                         if (ACHUNG.equals(wylie)) {
                             // peek ahead to distinguish between pa's,
                             // pa'ala and pa'am:
                             TGCPair nexttp = (i+1 < sz) ? gcs.get(i+1) : null;
-                            String nextwylie = (nexttp == null) ? "" : nexttp.wylie;
+                            String nextwylie = (nexttp == null) ? "" : nexttp.getWylie();
                             if (isAppendageNonVowelWylie(nextwylie)) {
                                 candidateType = "maybe-appendaged-root";
                             } else {
@@ -1016,7 +1017,7 @@ public class TibTextUtils implements THDLWylieConstants {
                             // peek ahead to distinguish between bpa's,
                             // bpa'ala and bpa'am:
                             TGCPair nexttp = (i+1 < sz) ? gcs.get(i+1) : null;
-                            String nextwylie = (nexttp == null) ? "" : nexttp.wylie;
+                            String nextwylie = (nexttp == null) ? "" : nexttp.getWylie();
                             if (isAppendageNonVowelWylie(nextwylie)) {
                                 candidateType = "maybe-appendaged-prefix-root";
                             } else {
@@ -1038,7 +1039,7 @@ public class TibTextUtils implements THDLWylieConstants {
                             // peek ahead to distinguish between
                             // gga'am and gaga'ala:
                             TGCPair nexttp = (i+1 < sz) ? gcs.get(i+1) : null;
-                            String nextwylie = (nexttp == null) ? "" : nexttp.wylie;
+                            String nextwylie = (nexttp == null) ? "" : nexttp.getWylie();
                             if (isAppendageNonVowelWylie(nextwylie)) {
                                 candidateType = "maybe-appendaged-prefix/root-root/suffix";
                             } else {
@@ -1120,7 +1121,11 @@ public class TibTextUtils implements THDLWylieConstants {
                             candidateType
                                 = candidateType.substring("maybe-".length()).intern();
                             // So that we get 'am, not 'm; 'ang, not 'ng:
-                            tp.wylie = WYLIE_aVOWEL + tp.wylie;
+
+                            // FIXME: cludge: weird place to do this.
+                            // pa'am, not pa'm is what we want, sure,
+                            // but doing this here is ugly.
+                            tp.setWylie(WYLIE_aVOWEL + tp.getWylie());
                         } else {
                             if (null != warnings)
                                 warnings.append("Found a tsheg bar that has an achung (" + ACHUNG + ") tacked on, followed by some other thing whose wylie is " + wylie + "\n");
@@ -1157,6 +1162,7 @@ public class TibTextUtils implements THDLWylieConstants {
             } else {
                 throw new Error("bad cls");
             }
+            lastPair = tp;
         }
         if (candidateType.startsWith("maybe-appendaged-")) {
             if (null != warnings)
@@ -1221,7 +1227,7 @@ public class TibTextUtils implements THDLWylieConstants {
                                          StringBuffer wylieBuffer) {
         TGCList gcs
             = breakTshegBarIntoGraphemeClusters(glyphList, noSuchWylie);
-        String candidateType = getClassificationOfTshegBar(gcs, warnings);
+        String candidateType = getClassificationOfTshegBar(gcs, warnings, false);
         int sz = gcs.size();
         if (candidateType == "invalid"
             || candidateType == "single-sanskrit-gc") {
@@ -1237,7 +1243,7 @@ public class TibTextUtils implements THDLWylieConstants {
             for (int i = 0; i < sz; i++) {
                 TGCPair tp = (TGCPair)gcs.get(i);
                 int cls = tp.classification;
-                String wylie = tp.wylie;
+                String wylie = tp.getWylie();
                 wylieBuffer.append(wylie);
                 if (TibetanMachineWeb.isWylieTibetanConsonantOrConsonantStack(wylie)
                     || TibetanMachineWeb.isWylieSanskritConsonantStack(wylie)) {
@@ -1290,9 +1296,9 @@ public class TibTextUtils implements THDLWylieConstants {
                 leftover = 3;
                 /* FIXME: these constants are hard-wired here, rather
                  * than in TibetanMachineWeb, because I'm lazy. */
-                String wylie1 = ((TGCPair)gcs.get(0)).wylie;
-                String wylie2 = ((TGCPair)gcs.get(1)).wylie;
-                String wylie3 = ((TGCPair)gcs.get(2)).wylie;
+                String wylie1 = ((TGCPair)gcs.get(0)).getWylie();
+                String wylie2 = ((TGCPair)gcs.get(1)).getWylie();
+                String wylie3 = ((TGCPair)gcs.get(2)).getWylie();
                 if ((wylie1.equals("g") && (wylie2.equals("d") || wylie2.equals("n") || wylie2.equals("s")))
                     || (wylie1.equals("d") && (wylie2.equals("g") || wylie2.equals("m")))
                     || (wylie1.equals("b") && wylie2.equals("d"))
@@ -1316,7 +1322,7 @@ public class TibTextUtils implements THDLWylieConstants {
                        || "prefix/root" == candidateType
                        || "root-suffix-postsuffix" == candidateType
                        || "root-suffix" == candidateType) {
-                String wylie1 = ((TGCPair)gcs.get(0)).wylie;
+                String wylie1 = ((TGCPair)gcs.get(0)).getWylie();
                 leftover = 1;
                 wylieBuffer.append(wylie1);
                 if (((TGCPair)gcs.get(0)).classification
@@ -1330,16 +1336,16 @@ public class TibTextUtils implements THDLWylieConstants {
                 }
                 if ("root-suffix-postsuffix" == candidateType) {
                     leftover = 3;
-                    String wylie2 = ((TGCPair)gcs.get(1)).wylie;
-                    String wylie3 = ((TGCPair)gcs.get(2)).wylie;
+                    String wylie2 = ((TGCPair)gcs.get(1)).getWylie();
+                    String wylie3 = ((TGCPair)gcs.get(2)).getWylie();
                     wylieBuffer.append(unambiguousPostAVowelWylie(wylie2,
                                                                   wylie3));
                 }
             } else if ("prefix-root-suffix" == candidateType
                        || "prefix-root" == candidateType
                        || "prefix-root-suffix-postsuffix" == candidateType) {
-                String wylie1 = ((TGCPair)gcs.get(0)).wylie;
-                String wylie2 = ((TGCPair)gcs.get(1)).wylie;
+                String wylie1 = ((TGCPair)gcs.get(0)).getWylie();
+                String wylie2 = ((TGCPair)gcs.get(1)).getWylie();
                 leftover = 2;
                 if (TibetanMachineWeb.isAmbiguousWylie(wylie1, wylie2))
                     wylieBuffer.append(wylie1 + WYLIE_DISAMBIGUATING_KEY + wylie2);
@@ -1357,8 +1363,8 @@ public class TibTextUtils implements THDLWylieConstants {
                 }
                 if ("prefix-root-suffix-postsuffix" == candidateType) {
                     leftover = 4;
-                    String wylie3 = ((TGCPair)gcs.get(2)).wylie;
-                    String wylie4 = ((TGCPair)gcs.get(3)).wylie;
+                    String wylie3 = ((TGCPair)gcs.get(2)).getWylie();
+                    String wylie4 = ((TGCPair)gcs.get(3)).getWylie();
                     wylieBuffer.append(unambiguousPostAVowelWylie(wylie3,
                                                                   wylie4));
                 }
@@ -1371,15 +1377,15 @@ public class TibTextUtils implements THDLWylieConstants {
             // append the wylie left over:
             for (int i = leftover; i < sz; i++) {
                 TGCPair tp = (TGCPair)gcs.get(i);
-                String wylie = tp.wylie;
+                String wylie = tp.getWylie();
                 wylieBuffer.append(wylie);
             }
         }
     }
 
 /**
-* Gets the Extended Wylie for a sequence of glyphs using Chandler's
-* experimental method.  This works as follows:
+* Gets the Extended Wylie for a sequence of glyphs.  This works as
+* follows:
 *
 * <p>We run along until we hit whitespace or punctuation.  We take
 * everything before that and we see if it's a legal Tibetan tsheg bar,
@@ -1480,4 +1486,90 @@ public class TibTextUtils implements THDLWylieConstants {
         }
         return rv;
     }
+
+    /** Returns true if and only if the stack with Wylie <i>root</i>
+     *  can take the prefix <i>prefix</i>. */
+    private static boolean isLegalPrefixRootCombo(String prefix, String root) {
+        // This will be decomposed enough.  If you can decompose it,
+        // then it doesn't take a prefix!
+        if (!TibetanMachineWeb.isKnownHashKey(root)) {
+            root = root.replace('+', '-');
+            if (!TibetanMachineWeb.isKnownHashKey(root)) {
+                throw new Error("root is, now, " + root); // FIXME: make this an assertion
+            }
+        }
+        String ru = TibetanMachineWeb.getUnicodeForWylieForGlyph(root);
+
+        // ru may be for (head, root, sub), (head, root), (root), or
+        // (root, sub).  Try all possibilities that are possible with
+        // a String of length ru.  If there's a wa-zur, then we say
+        // (FIXME: do we say correctly?) that a stack with wa-zur can
+        // take a prefix if and only if the stack without can take a
+        // prefix.
+
+        if (ru == null) throw new Error("how? root is " + root); // FIXME: make this an assertion
+        int rl = ru.length();
+        if (ru.charAt(rl - 1) == UnicodeConstants.EWSUB_wa_zur)
+            --rl; // forget about wa-zur: see above.
+        if (rl == 2) {
+            char ch0 = ru.charAt(0);
+            char ch1 = UnicodeUtils.getNominalRepresentationOfSubscribedConsonant(ru.charAt(1));
+
+            // (head, root) and (root, sub) are possibilities.
+            if (ACHUNG.equals(prefix)) {
+                return LegalTshegBar.takesAchungPrefix(ch0, ch1, UnicodeConstants.EW_ABSENT)
+                    || LegalTshegBar.takesAchungPrefix(UnicodeConstants.EW_ABSENT, ch0, ch1);
+            } else if ("b".equals(prefix)) {
+                return LegalTshegBar.takesBao(ch0, ch1, UnicodeConstants.EW_ABSENT)
+                    || LegalTshegBar.takesBao(UnicodeConstants.EW_ABSENT, ch0, ch1);
+            } else if ("m".equals(prefix)) {
+                return LegalTshegBar.takesMao(ch0, ch1, UnicodeConstants.EW_ABSENT)
+                    || LegalTshegBar.takesMao(UnicodeConstants.EW_ABSENT, ch0, ch1);
+            } else if ("g".equals(prefix)) {
+                return LegalTshegBar.takesGao(ch0, ch1, UnicodeConstants.EW_ABSENT)
+                    || LegalTshegBar.takesGao(UnicodeConstants.EW_ABSENT, ch0, ch1);
+            } else if ("d".equals(prefix)) {
+                return LegalTshegBar.takesDao(ch0, ch1, UnicodeConstants.EW_ABSENT)
+                    || LegalTshegBar.takesDao(UnicodeConstants.EW_ABSENT, ch0, ch1);
+            } else {
+                throw new IllegalArgumentException("prefix is " + prefix);
+            }
+        } else if (rl == 1) {
+            char ch0 = ru.charAt(0);
+            // (root) is the only choice.
+            if (ACHUNG.equals(prefix)) {
+                return LegalTshegBar.takesAchungPrefix(UnicodeConstants.EW_ABSENT, ch0, UnicodeConstants.EW_ABSENT);
+            } else if ("b".equals(prefix)) {
+                return LegalTshegBar.takesBao(UnicodeConstants.EW_ABSENT, ch0, UnicodeConstants.EW_ABSENT);
+            } else if ("m".equals(prefix)) {
+                return LegalTshegBar.takesMao(UnicodeConstants.EW_ABSENT, ch0, UnicodeConstants.EW_ABSENT);
+            } else if ("g".equals(prefix)) {
+                return LegalTshegBar.takesGao(UnicodeConstants.EW_ABSENT, ch0, UnicodeConstants.EW_ABSENT);
+            } else if ("d".equals(prefix)) {
+                return LegalTshegBar.takesDao(UnicodeConstants.EW_ABSENT, ch0, UnicodeConstants.EW_ABSENT);
+            } else {
+                throw new IllegalArgumentException("prefix is " + prefix);
+            }
+        } else if (rl == 3) {
+            char ch0 = ru.charAt(0);
+            char ch1 = UnicodeUtils.getNominalRepresentationOfSubscribedConsonant(ru.charAt(1));
+            char ch2 = UnicodeUtils.getNominalRepresentationOfSubscribedConsonant(ru.charAt(2));
+            // (head, root, sub) is the only choice.
+            if (ACHUNG.equals(prefix)) {
+                return LegalTshegBar.takesAchungPrefix(ch0, ch1, ch2);
+            } else if ("b".equals(prefix)) {
+                return LegalTshegBar.takesBao(ch0, ch1, ch2);
+            } else if ("m".equals(prefix)) {
+                return LegalTshegBar.takesMao(ch0, ch1, ch2);
+            } else if ("g".equals(prefix)) {
+                return LegalTshegBar.takesGao(ch0, ch1, ch2);
+            } else if ("d".equals(prefix)) {
+                return LegalTshegBar.takesDao(ch0, ch1, ch2);
+            } else {
+                throw new IllegalArgumentException("prefix is " + prefix);
+            }
+        } else {
+            return false;
+        }
+    }
 }
diff --git a/source/org/thdl/tib/text/TibetanMachineWeb.java b/source/org/thdl/tib/text/TibetanMachineWeb.java
index 8ee9fb2..6200473 100644
--- a/source/org/thdl/tib/text/TibetanMachineWeb.java
+++ b/source/org/thdl/tib/text/TibetanMachineWeb.java
@@ -178,14 +178,19 @@ public class TibetanMachineWeb implements THDLWylieConstants {
 
 
 
-    // NOTE WELL: if you delete from consonants, numbers, vowels, or
-    // others, you'll change the way Jskad's Extended Wylie keyboard
-    // works, yes, but you'll also change TMW->Wylie.
+    // NOTE WELL: if you delete from tibetanConsonants,
+    // otherConsonants, numbers, vowels, or others, you'll change the
+    // way Jskad's Extended Wylie keyboard works, yes, but you'll also
+    // change TMW->Wylie.
 
-    /** comma-delimited list of supported consonants (Tibetan and
-        Tibetanized Sanskrit): */
-	private static final String consonants
-        = "k,kh,g,ng,c,ch,j,ny,t,th,d,n,p,ph,b,m,ts,tsh,dz,w,zh,z,',y,r,l,sh,s,h,a,T,Th,D,N,Sh,v,f,Dz";
+    /** comma-delimited list of supported Tibetan consonants: */
+	private static final String tibetanConsonants
+        = "k,kh,g,ng,c,ch,j,ny,t,th,d,n,p,ph,b,m,ts,tsh,dz,w,zh,z,',y,r,l,sh,s,h,a";
+
+    /** comma-delimited list of supported non-Tibetan consonants, such
+     *  as Sanskrit consonants: */
+	private static final String otherConsonants // va and fa are treated pretty-much like Sanskrit.
+        = "T,Th,D,N,Sh,v,f,Dz";
 
     /** comma-delimited list of supported numbers (superscribed,
         subscribed, normal, half-numerals): */
@@ -371,7 +376,7 @@ public class TibetanMachineWeb implements THDLWylieConstants {
         charSet = new HashSet();
 
         tibSet = new HashSet();
-		sTok = new StringTokenizer(consonants, ",");
+		sTok = new StringTokenizer(tibetanConsonants, ",");
 		while (sTok.hasMoreTokens()) {
             String ntk;
 			charSet.add(ntk = sTok.nextToken());
@@ -379,6 +384,15 @@ public class TibetanMachineWeb implements THDLWylieConstants {
             validInputSequences.put(ntk, anyOldObjectWillDo);
         }
 
+        sanskritStackSet = new HashSet();
+		sTok = new StringTokenizer(otherConsonants, ",");
+		while (sTok.hasMoreTokens()) {
+            String ntk;
+			charSet.add(ntk = sTok.nextToken());
+            sanskritStackSet.add(ntk);
+            validInputSequences.put(ntk, anyOldObjectWillDo);
+        }
+
         numberSet = new HashSet();
 		sTok = new StringTokenizer(numbers, ",");
 		while (sTok.hasMoreTokens()) {
@@ -386,7 +400,7 @@ public class TibetanMachineWeb implements THDLWylieConstants {
             // do it in <?Input:Numbers?> so that Jskad has the same
             // TMW->Wylie conversion regardless of whether or not it
             // chooses to support inputting numbers.  Likewise for
-            // consonants, others, and vowels.
+            // tibetanConsonants, otherConsonants, others, and vowels.
             String ntk;
 			charSet.add(ntk = sTok.nextToken());
             numberSet.add(ntk);
@@ -427,8 +441,6 @@ public class TibetanMachineWeb implements THDLWylieConstants {
 
 			boolean ignore = false;
 
-            sanskritStackSet = new HashSet();
-
 			while ((line = in.readLine()) != null) {
 				if (line.startsWith("<?")) { //line is command
 					if (line.equalsIgnoreCase("<?Consonants?>")) {
@@ -1182,6 +1194,23 @@ public static boolean hasGlyph(String hashKey) {
 		return true;
 }
 
+/** Returns the Unicode correspondence for the Wylie wylie, which must
+ *  be Wylie returned by getWylieForGlyph(int, int, boolean[]).
+ *  Returns null if the Unicode correspondence is nonexistent or
+ *  unknown. */
+public static String getUnicodeForWylieForGlyph(String wylie) {
+    DuffCode dc = getGlyph(wylie);
+    return mapTMWtoUnicode(dc.getFontNum() - 1, dc.getCharNum());
+}
+
+/**
+* Returns true if and only if hashKey is a known hash key from tibwn.ini.
+*/
+public static boolean isKnownHashKey(String hashKey) {
+	DuffCode[] dc = (DuffCode[])tibHash.get(hashKey);
+    return (null != dc);
+}
+
 /**
 * Gets a glyph for this hash key. Hash keys are not identical to Extended
 * Wylie. The hash key for a Tibetan stack separates the members of the stack
@@ -1193,7 +1222,7 @@ public static boolean hasGlyph(String hashKey) {
 public static DuffCode getGlyph(String hashKey) {
 	DuffCode[] dc = (DuffCode[])tibHash.get(hashKey);
     if (null == dc)
-        throw new Error("It is likely that you misconfigured tibwn.ini such that, say, M is expected (i.e., it is listed as, e.g. punctuation), but no 'M~...' line appears.");
+        throw new Error("Hash key " + hashKey + " not found; it is likely that you misconfigured tibwn.ini such that, say, M is expected (i.e., it is listed as, e.g. punctuation), but no 'M~...' line appears.");
 	return dc[TMW];
 }
 
diff --git a/source/org/thdl/tib/text/tibwn.ini b/source/org/thdl/tib/text/tibwn.ini
index 175d57c..160e3b9 100644
--- a/source/org/thdl/tib/text/tibwn.ini
+++ b/source/org/thdl/tib/text/tibwn.ini
@@ -98,13 +98,6 @@ __TILDE__~93,5~~9,91~~~~~~~none
 
 
 <?Input:Tibetan?>
-
-// 0F5F,0F39 might work, but the OpenType font's author must've had
-// Dza in mind if it does.  Note that the bottommost horizontal stroke
-// goes upward on U+0F5F and downward on U+0F5B.
-Dz~146,5~~10,42~~~~~~~none
-f~153,5~~10,58~1,110~1,118~1,124~1,126~10,114~10,123~0F55,0F39
-v~154,5~~10,59~1,110~1,118~1,124~1,126~10,114~10,123~0F56,f39
 k~33,1~1,92~1,33~1,109~1,111~1,123~1,125~10,118~10,120~0F40
 kh~34,1~~1,34~1,109~1,118~1,123~1,125~10,114~10,123~0F41
 g~35,1~1,93~1,35~1,109~1,111~1,123~1,125~10,118~10,120~0F42
@@ -135,11 +128,6 @@ sh~59,1~1,99~1,60~1,109~1,111~1,123~1,125~10,118~10,120~0F64
 s~60,1~~1,61~1,109~1,118~1,123~1,125~10,114~10,123~0F66
 h~61,1~1,100~1,62~1,109~1,112~1,123~1,125~10,115~10,122~0F67~1,102
 a~62,1~~1,63~1,109~1,118~1,123~1,125~10,114~10,123~0F68
-T~170,1~~1,64~1,109~1,120~1,123~1,125~10,115~10,124~0F4A
-Th~171,1~~1,65~1,109~1,118~1,123~1,125~10,114~10,123~0F4B
-D~172,1~~1,66~1,109~1,120~1,123~1,125~10,115~10,124~0F4C
-N~173,1~~1,67~1,109~1,118~1,123~1,125~10,115~10,124~0F4E
-Sh~174,1~~1,68~1,109~1,118~1,123~1,125~10,115~10,124~0F65
 r-k~63,1~~1,70~1,109~1,121~1,123~1,125~10,115~10,124~f62,f90
 r-g~64,1~~1,71~1,109~1,121~1,123~1,125~10,115~10,124~f62,f92
 r-ng~65,1~~1,72~1,109~1,119~1,123~1,125~10,115~10,124~f62,f94
@@ -241,6 +229,17 @@ au~237,1~~8,89~~~~~~~0F7D~~8,104
 // DLC FIXME: need -I as well
 
 <?Input:Sanskrit?>
+// 0F5F,0F39 might work, but the OpenType font's author must've had
+// Dza in mind if it does.  Note that the bottommost horizontal stroke
+// goes upward on U+0F5F and downward on U+0F5B.
+Dz~146,5~~10,42~~~~~~~none
+f~153,5~~10,58~1,110~1,118~1,124~1,126~10,114~10,123~0F55,0F39
+v~154,5~~10,59~1,110~1,118~1,124~1,126~10,114~10,123~0F56,f39
+T~170,1~~1,64~1,109~1,120~1,123~1,125~10,115~10,124~0F4A
+Th~171,1~~1,65~1,109~1,118~1,123~1,125~10,114~10,123~0F4B
+D~172,1~~1,66~1,109~1,120~1,123~1,125~10,115~10,124~0F4C
+N~173,1~~1,67~1,109~1,118~1,123~1,125~10,115~10,124~0F4E
+Sh~174,1~~1,68~1,109~1,118~1,123~1,125~10,115~10,124~0F65
 k+Sh~175,1~~1,69~1,109~1,122~1,123~1,125~10,116~10,125~0F69
 k+k~33,2~~3,33~1,109~4,120~1,123~1,125~4,106~4,113~f40,f90
 k+kh~34,2~~3,34~1,109~4,120~1,123~1,125~4,106~4,113~f40,f91
diff --git a/source/org/thdl/tib/text/tshegbar/LegalTshegBar.java b/source/org/thdl/tib/text/tshegbar/LegalTshegBar.java
index 5dcb0fc..1e53ad2 100644
--- a/source/org/thdl/tib/text/tshegbar/LegalTshegBar.java
+++ b/source/org/thdl/tib/text/tshegbar/LegalTshegBar.java
@@ -1266,7 +1266,7 @@ public final class LegalTshegBar
      *  @param sub the {@link #isNominalRepresentationOfConsonant(char)
      *  nominal representation} of the subjoined letter, or EW_ABSENT
      *  if not present */
-    static boolean takesGao(char head, char root, char sub) {
+    public static boolean takesGao(char head, char root, char sub) {
         if (EW_ABSENT == head) {
             if (EW_ABSENT == sub) {
                 return (EWC_ca == root
@@ -1298,7 +1298,7 @@ public final class LegalTshegBar
      *  @param sub the {@link #isNominalRepresentationOfConsonant(char)
      *  nominal representation} of the subjoined letter, or EW_ABSENT
      *  if not present */
-    static boolean takesDao(char head, char root, char sub) {
+    public static boolean takesDao(char head, char root, char sub) {
         if (EW_ABSENT == head) {
             if (EW_ABSENT == sub) {
                 return (EWC_ka == root
@@ -1312,6 +1312,7 @@ public final class LegalTshegBar
                         || (EWC_pa == root && EWC_ya == sub)
                         || (EWC_ba == root && EWC_ya == sub)
                         || (EWC_ma == root && EWC_ya == sub)
+                        || (EWC_ka == root && EWC_ya == sub) // dkyil, for example
 
                         || (EWC_ka == root && EWC_ra == sub)
                         || (EWC_ga == root && EWC_ra == sub)
@@ -1336,7 +1337,7 @@ public final class LegalTshegBar
      *  @param sub the {@link #isNominalRepresentationOfConsonant(char)
      *  nominal representation} of the subjoined letter, or EW_ABSENT
      *  if not present */
-    static boolean takesAchungPrefix(char head, char root, char sub) {
+    public static boolean takesAchungPrefix(char head, char root, char sub) {
         if (EW_ABSENT == head) {
             if (EW_ABSENT == sub) {
                 return (EWC_ga == root
@@ -1379,7 +1380,7 @@ public final class LegalTshegBar
      *  @param sub the {@link #isNominalRepresentationOfConsonant(char)
      *  nominal representation} of the subjoined letter, or EW_ABSENT
      *  if not present */
-    static boolean takesMao(char head, char root, char sub) {
+    public static boolean takesMao(char head, char root, char sub) {
         if (EW_ABSENT == head) {
             if (EW_ABSENT == sub) {
                 return (EWC_kha == root
@@ -1418,11 +1419,12 @@ public final class LegalTshegBar
      *  @param sub the {@link #isNominalRepresentationOfConsonant(char)
      *  nominal representation} of the subjoined letter, or EW_ABSENT
      *  if not present */
-    static boolean takesBao(char head, char root, char sub) {
+    public static boolean takesBao(char head, char root, char sub) {
         // DLC ask Ten-lo la about Wazur.
         if (EW_ABSENT == head) {
             if (EW_ABSENT == sub) {
                 return (EWC_ka == root
+                        || EWC_sa == root // bsams, for example
                         || EWC_ca == root
                         || EWC_ta == root
                         || EWC_tsa == root
diff --git a/source/org/thdl/tib/text/tshegbar/UnicodeUtils.java b/source/org/thdl/tib/text/tshegbar/UnicodeUtils.java
index 544df36..5f18e32 100644
--- a/source/org/thdl/tib/text/tshegbar/UnicodeUtils.java
+++ b/source/org/thdl/tib/text/tshegbar/UnicodeUtils.java
@@ -232,6 +232,14 @@ public class UnicodeUtils implements UnicodeConstants {
         /* DLC FIXME -- I was using 3.0 p.437-440, check 3.2. */
     }
 
+    /** If ch is in one of the ranges U+0F90-U+0F97, U+0F99-U+0FB9,
+     *  then this returns the same consonant in the range
+     *  U+0F40-U+0F69.  If ch is not in that range, this returns
+     *  garbage. */
+    public static char getNominalRepresentationOfSubscribedConsonant(char ch) {
+        return (char)((int)ch-(((int)'\u0F90') - ((int)'\u0F40')));
+    }
+
     /** Returns true iff ch corresponds to the Tibetan letter ra.
         Several Unicode codepoints correspond to the Tibetan letter ra
         (in its subscribed form or otherwise).  Oftentimes,
diff --git a/source/org/thdl/tib/text/ttt/ACIPConverter.java b/source/org/thdl/tib/text/ttt/ACIPConverter.java
index 03c40fa..d8166ba 100644
--- a/source/org/thdl/tib/text/ttt/ACIPConverter.java
+++ b/source/org/thdl/tib/text/ttt/ACIPConverter.java
@@ -58,28 +58,46 @@ public class ACIPConverter {
         ArrayList al = ACIPTshegBarScanner.scanFile(args[1], errors, strict, maxErrors - 1);
 
         if (null == al) {
-            System.err.println(maxErrors + " or more errors occurred while scanning ACIP input file; is this");
+            System.err.println(maxErrors + " or more lexical errors occurred while scanning ACIP input file; is this");
             System.err.println("Tibetan or English input?");
             System.err.println("");
-            System.err.println("First " + maxErrors + " errors scanning ACIP input file: ");
-            System.err.println(errors);
-            System.err.println("Exiting with " + maxErrors + " or more errors; please fix input file and try again.");
+            if (false) {
+                // Nobody wants to see this.  FIXME: maybe somebody; have an option.
+                System.err.println("First " + maxErrors + " lexical errors scanning ACIP input file: ");
+                System.err.println(errors);
+            }
+            System.err.println("Exiting with " + maxErrors + " or more lexical errors; please fix input file and try again.");
             System.exit(1);
         }
+        final boolean abortUponScanningError = false; // DLC MAKE ME CONFIGURABLE
+        // DLC NOW: BAo isn't converting.
         if (errors.length() > 0) {
             System.err.println("Errors scanning ACIP input file: ");
             System.err.println(errors);
-            System.err.println("Exiting; please fix input file and try again.");
-            System.exit(1);
+            if (abortUponScanningError) {
+                System.err.println("Exiting; please fix input file and try again.");
+                System.exit(1);
+            }
         }
 
-        convertToUnicode(al, System.out, errors);
+        StringBuffer warnings = new StringBuffer();
+        boolean putWarningsInOutput = true; // DLC make me configurable.
+        convertToUnicode(al, System.out, errors, warnings,
+                         putWarningsInOutput);
         if (errors.length() > 0) {
             System.err.println("Errors converting ACIP input file: ");
             System.err.println(errors);
+            System.err.println("The output contains these errors.");
             System.err.println("Exiting; please fix input file and try again.");
             System.exit(2);
         }
+        if (warnings.length() > 0) {
+            System.err.println("Warnings converting ACIP input file: ");
+            System.err.println(warnings);
+            if (putWarningsInOutput)
+                System.err.println("The output contains these warnings.");
+            System.exit(2);
+        }
         if (verbose) System.err.println("Converted " + args[1] + " perfectly.");
         System.exit(0);
     }
@@ -96,19 +114,30 @@ public class ACIPConverter {
     {
         throw new Error("DLC UNIMPLEMENTED");
     }
+    // DLC FIXME: sometimes { } is \u0F0B, and sometimes it is a
+    // space.  Treat it as a tsheg only when it appears after a
+    // syllable or another tsheg.
 
     /** Returns UTF-8 encoded Unicode.  A bit indirect, so use this
      *  for testing only if performance is a concern.  If errors occur
      *  in scanning the ACIP or in converting a tsheg bar, then they
-     *  are appended to errors if errors is non-null.  Returns the
+     *  are appended to errors if errors is non-null, as well as
+     *  written to the result.  If warnings occur in scanning the ACIP
+     *  or in converting a tsheg bar, then they are appended to
+     *  warnings if warnings is non-null, and they are written to the
+     *  result if writeWarningsToResult is true.  Returns the
      *  conversion upon perfect success, null if errors occurred.
      */
     public static String convertToUnicode(String acip,
-                                          StringBuffer errors) {
+                                          StringBuffer errors,
+                                          StringBuffer warnings,
+                                          boolean writeWarningsToResult) {
         ByteArrayOutputStream sw = new ByteArrayOutputStream();
         ArrayList al = ACIPTshegBarScanner.scan(acip, errors, true /* DLC FIXME */, -1);
         try {
-            if (null != al && convertToUnicode(al, sw, errors)) {
+            if (null != al
+                && convertToUnicode(al, sw, errors,
+                                    warnings, writeWarningsToResult)) {
                 return sw.toString("UTF-8");
             } else {
                 System.out.println("DLC al is " + al + " and convertToUnicode returned null.");
@@ -119,15 +148,25 @@ public class ACIPConverter {
         }
     }
 
-    /** Writes Unicode to out.  If errors occur in converting a
-     *  tsheg bar, then they are appended to errors if errors is
-     *  non-null.  Returns true upon perfect success, false if errors
-     *  occurred.
+    /** Writes Unicode to out.  If errors occur in converting a tsheg
+     *  bar, then they are appended to errors if errors is non-null.
+     *  Furthermore, errors are written to out.  If writeWarningsToOut
+     *  is true, then warnings also will be written to out.  Returns
+     *  true upon perfect success, false if errors occurred.
+     *  @param scan result of ACIPTshegBarScanner.scan(..)
+     *  @param out stream to which to write converted text
+     *  @param errors if non-null, all error messages are appended
+     *  @param warnings if non-null, all warning messages are appended
+     *  to this
+     *  @param writeWarningsToOut if true, then all warning messages
+     *  are written to out in the appropriate places
      *  @throws IOException if we cannot write to out
      */
     public static boolean convertToUnicode(ArrayList scan,
                                            OutputStream out,
-                                           StringBuffer errors)
+                                           StringBuffer errors,
+                                           StringBuffer warnings,
+                                           boolean writeWarningsToOut)
         throws IOException
     {
         int sz = scan.size();
@@ -139,7 +178,7 @@ public class ACIPConverter {
             int stype = s.getType();
             if (stype == ACIPString.ERROR) {
                 hasErrors = true;
-                writer.write("[#ERROR CONVERTING ACIP DOCUMENT: ");
+                writer.write("[#ERROR CONVERTING ACIP DOCUMENT: Lexical error: ");
                 writer.write(s.getText());
                 writer.write("]");
             } else {
@@ -179,6 +218,21 @@ public class ACIPConverter {
                                     if (null != errors)
                                         errors.append(errorMessage + "\n");
                                 } else {
+                                    String warning
+                                        = pt.getWarning(false, // DLC: make me configurable
+                                                        pl,
+                                                        s.getText());
+                                    if (null != warning) {
+                                        if (writeWarningsToOut) {
+                                            writer.write("[#WARNING CONVERTING ACIP DOCUMENT: ");
+                                            writer.write(warning);
+                                            writer.write("]");
+                                        }
+                                        if (null != warnings) {
+                                            warnings.append(warning);
+                                            warnings.append('\n');
+                                        }
+                                    }
                                     unicode = sl.getUnicode();
                                     if (null == unicode) throw new Error("DLC: HOW?");
                                 }
diff --git a/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java b/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java
index 2879683..bea56a6 100644
--- a/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java
+++ b/source/org/thdl/tib/text/ttt/ACIPTshegBarScanner.java
@@ -133,16 +133,18 @@ public class ACIPTshegBarScanner {
         Stack bracketTypeStack = new Stack();
         int startSlashIndex = -1;
         int startParenIndex = -1;
+        int numNewlines = 0;
         for (int i = 0; i < sl; i++) {
             if (i < startOfString) throw new Error("bad reset");
             char ch;
             ch = s.charAt(i);
+            if (ch == '\n') ++numNewlines;
             if (ACIPString.COMMENT == currentType && ch != ']') {
                 if ('[' == ch) {
                     al.add(new ACIPString("Found an open bracket within a [#COMMENT]-style comment.  Brackets may not appear in comments.\n",
                                           ACIPString.ERROR));
                     if (null != errors)
-                        errors.append("Offset " + i + ": "
+                        errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                       + "Found an open bracket within a [#COMMENT]-style comment.  Brackets may not appear in comments.\n");
                     if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
                 }
@@ -157,17 +159,18 @@ public class ACIPTshegBarScanner {
                         al.add(new ACIPString(s.substring(startOfString, i),
                                               currentType));
                     }
-                    al.add(new ACIPString(s.substring(i, i+1), ACIPString.ERROR));
+                    al.add(new ACIPString("Found a truly unmatched close bracket, " + s.substring(i, i+1),
+                                          ACIPString.ERROR));
                     if (!waitingForMatchingIllegalClose) {
                         if (null != errors) {
-                            errors.append("Offset " + i + ": "
+                            errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                           + "Found a truly unmatched close bracket, ] or }.\n");
                         }
                         if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
                     }
                     waitingForMatchingIllegalClose = false;
                     if (null != errors)
-                        errors.append("Offset " + i + ": "
+                        errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                       + "Found a closing bracket without a matching open bracket.  Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
                     if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
                     startOfString = i+1;
@@ -249,6 +252,11 @@ public class ACIPTshegBarScanner {
                                || s.substring(i, i + "[BP]".length()).equals("{BP}"))) {
                     thingy = "[BP]";
                     currentType = ACIPString.BP;
+                } else if (i + "[BLANK PAGE]".length() <= sl
+                           && (s.substring(i, i + "[BLANK PAGE]".length()).equals("[BLANK PAGE]")
+                               || s.substring(i, i + "[BLANK PAGE]".length()).equals("{BLANK PAGE}"))) {
+                    thingy = "[BLANK PAGE]";
+                    currentType = ACIPString.BP;
                 } else if (i + "[ BP ]".length() <= sl
                            && (s.substring(i, i + "[ BP ]".length()).equals("[ BP ]")
                                || s.substring(i, i + "[ BP ]".length()).equals("{ BP }"))) {
@@ -414,11 +422,11 @@ public class ACIPTshegBarScanner {
                     // This is an error.  Sometimes [COMMENTS APPEAR
                     // WITHOUT # MARKS].  Though "... [" could cause
                     // this too.
-                    al.add(new ACIPString(s.substring(i, i+1),
+                    al.add(new ACIPString("Found an illegal open bracket: " + s.substring(i, i+1),
                                           ACIPString.ERROR));
                     if (waitingForMatchingIllegalClose) {
                         if (null != errors) {
-                            errors.append("Offset " + i + ": "
+                            errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                           + "Found a truly unmatched open bracket, [ or {, prior to this current illegal open bracket.\n");
                         }
                         if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
@@ -435,7 +443,7 @@ public class ACIPTshegBarScanner {
                                 inContext = inContext + "...";
                             }
                         }
-                        errors.append("Offset " + i + ": "
+                        errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                       + "Found an illegal open bracket (in context, this is " + inContext + ").  Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\n");
                         if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
                     }
@@ -477,7 +485,6 @@ public class ACIPTshegBarScanner {
                             if (i+numdigits+2 < sl && s.charAt(i+numdigits+2) == '.') {
                                 if (!(i+numdigits+4 < sl && isNumeric(s.charAt(i+numdigits+3))
                                       && !isNumeric(s.charAt(i+numdigits+4)))) {
-                                    al.add(new ACIPString(s.substring(i, i+numdigits+3), ACIPString.ERROR));
                                     String inContext = s.substring(i, i+Math.min(sl-i, 10));
                                     if (inContext.indexOf("\r") >= 0) {
                                         inContext = inContext.substring(0, inContext.indexOf("\r"));
@@ -488,8 +495,10 @@ public class ACIPTshegBarScanner {
                                             inContext = inContext + "...";
                                         }
                                     }
+                                    al.add(new ACIPString("Found an illegal at sign, @ (in context, this is " + inContext + ").  This folio marker has a period, '.', at the end of it, which is illegal.",
+                                                          ACIPString.ERROR));
                                     if (null != errors)
-                                        errors.append("Offset " + i + ": "
+                                        errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                                       + "Found an illegal at sign, @ (in context, this is " + inContext + ").  This folio marker has a period, '.', at the end of it, which is illegal.\n");
                                     if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
                                     startOfString = i+numdigits+3;
@@ -498,7 +507,6 @@ public class ACIPTshegBarScanner {
                                     break;
                                 }
                                 if (i+numdigits+4 < sl && (s.charAt(i+numdigits+4) == '.' || s.charAt(i+numdigits+4) == 'A' || s.charAt(i+numdigits+4) == 'B' || s.charAt(i+numdigits+4) == 'a' || s.charAt(i+numdigits+4) == 'b' || isNumeric(s.charAt(i+numdigits+4)))) {
-                                    al.add(new ACIPString(s.substring(i, i+1), ACIPString.ERROR));
                                     String inContext = s.substring(i, i+Math.min(sl-i, 10));
                                     if (inContext.indexOf("\r") >= 0) {
                                         inContext = inContext.substring(0, inContext.indexOf("\r"));
@@ -509,8 +517,10 @@ public class ACIPTshegBarScanner {
                                             inContext = inContext + "...";
                                         }
                                     }
+                                    al.add(new ACIPString("Found an illegal at sign, @ (in context, this is " + inContext + ").  This folio marker is not followed by whitespace, as is expected.",
+                                                          ACIPString.ERROR));
                                     if (null != errors)
-                                        errors.append("Offset " + i + ": "
+                                        errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                                       + "Found an illegal at sign, @ (in context, this is " + inContext + ").  This folio marker is not followed by whitespace, as is expected.\n");
                                     if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
                                     startOfString = i+1; // DLC FIXME: skip over more?
@@ -572,7 +582,9 @@ public class ACIPTshegBarScanner {
                     }
                     
                     // This case, @NNN, must come after the @NNN{AB} case.
-                    if (i+numdigits+1 < sl && s.charAt(i+numdigits+1) == ' ') {
+                    if (i+numdigits+1 < sl && (s.charAt(i+numdigits+1) == ' '
+                                               || s.charAt(i+numdigits+1) == '\n'
+                                               || s.charAt(i+numdigits+1) == '\r')) {
                         boolean allAreNumeric = true;
                         for (int k = 1; k <= numdigits; k++) {
                             if (!isNumeric(s.charAt(i+k))) {
@@ -591,7 +603,6 @@ public class ACIPTshegBarScanner {
                     }
                 }
                 if (startOfString == i) {
-                    al.add(new ACIPString(s.substring(i, i+1), ACIPString.ERROR));
                     String inContext = s.substring(i, i+Math.min(sl-i, 10));
                     if (inContext.indexOf("\r") >= 0) {
                         inContext = inContext.substring(0, inContext.indexOf("\r"));
@@ -602,8 +613,10 @@ public class ACIPTshegBarScanner {
                             inContext = inContext + "...";
                         }
                     }
+                    al.add(new ACIPString("Found an illegal at sign, @ (in context, this is " + inContext + ").  @012B is an example of a legal folio marker.",
+                                          ACIPString.ERROR));
                     if (null != errors)
-                        errors.append("Offset " + i + ": "
+                        errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                       + "Found an illegal at sign, @ (in context, this is " + inContext + ").  @012B is an example of a legal folio marker.\n");
                     if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
                     startOfString = i+1;
@@ -626,9 +639,10 @@ public class ACIPTshegBarScanner {
                          * it means /NYA/.  We warn about // for this
                          * reason.  \\ causes a tsheg-bar error (DLC
                          * FIXME: verify this is so). */
-                        al.add(new ACIPString("//", ACIPString.ERROR));
+                        al.add(new ACIPString("Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.",
+                                              ACIPString.ERROR));
                         if (errors != null) {
-                            errors.append("Offset " + i + ": "
+                            errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                           + "Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\n");
                         }
                         if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
@@ -661,9 +675,10 @@ public class ACIPTshegBarScanner {
 
                 if (startParenIndex >= 0) {
                     if (ch == '(') {
-                        al.add(new ACIPString("Nesting of parentheses () is not allowed", ACIPString.ERROR));
+                        al.add(new ACIPString("Found an illegal open parenthesis, (.  Nesting of parentheses is not allowed.",
+                                              ACIPString.ERROR));
                         if (null != errors)
-                            errors.append("Offset " + i + ": "
+                            errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                           + "Found an illegal open parenthesis, (.  Nesting of parentheses is not allowed.\n");
                         if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
                     } else {
@@ -674,9 +689,10 @@ public class ACIPTshegBarScanner {
                     currentType = ACIPString.ERROR;
                 } else {
                     if (ch == ')') {
-                        al.add(new ACIPString("Unexpected closing parenthesis )", ACIPString.ERROR));
+                        al.add(new ACIPString("Unexpected closing parenthesis, ), found.",
+                                              ACIPString.ERROR));
                         if (null != errors)
-                            errors.append("Offset " + i + ": "
+                            errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                           + "Unexpected closing parenthesis, ), found.\n");
                         if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
                     } else {
@@ -724,10 +740,10 @@ public class ACIPTshegBarScanner {
                     al.add(new ACIPString(s.substring(i, i+1),
                                           ACIPString.TIBETAN_PUNCTUATION));
                 } else {
-                    al.add(new ACIPString(s.substring(i, i+1),
+                    al.add(new ACIPString("A non-breaking tsheg, '.', appeared, but not like \"...,\" or \".,\" or \".dA\" or \".DA\".",
                                           ACIPString.ERROR));
                     if (null != errors)
-                        errors.append("Offset " + i + ": "
+                        errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                       + "A non-breaking tsheg, '.', appeared, but not like \"...,\" or \".,\" or \".dA\" or \".DA\".\n");
                     if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
                 }
@@ -772,19 +788,24 @@ public class ACIPTshegBarScanner {
                         al.add(new ACIPString(s.substring(startOfString, i),
                                               currentType));
                     }
-                    al.add(new ACIPString(s.substring(i, i+1),
-                                          ACIPString.ERROR));
-                    if (null != errors) {
-                        if ((int)ch == 65533) {
-                            errors.append("Offset " + i + ": "
+                    if ((int)ch == 65533) {
+                        al.add(new ACIPString("Found an illegal, unprintable character.",
+                                              ACIPString.ERROR));
+                        if (null != errors)
+                            errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                           + "Found an illegal, unprintable character.\n");
-                        } else if ('\\' == ch) {
-                            errors.append("Offset " + i + ": "
+                    } else if ('\\' == ch) {
+                        al.add(new ACIPString("Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly.  Sorry!  Please do complain to the maintainers.",
+                                              ACIPString.ERROR));
+                        if (null != errors)
+                            errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                           + "Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly.  Sorry!  Please do complain to the maintainers.\n");
-                        } else {
-                            errors.append("Offset " + i + ": "
+                    } else {
+                        al.add(new ACIPString("Found an illegal character, " + ch + ", with ordinal " + (int)ch + ".",
+                                              ACIPString.ERROR));
+                        if (null != errors)
+                            errors.append("Offset " + i + " or maybe " + (i-numNewlines) + ": "
                                           + "Found an illegal character, " + ch + ", with ordinal " + (int)ch + ".\n");
-                        }
                     }
                     if (maxErrors >= 0 && ++numErrors >= maxErrors) return null;
                     startOfString = i+1;
diff --git a/source/org/thdl/tib/text/ttt/PackageTest.java b/source/org/thdl/tib/text/ttt/PackageTest.java
index b447da1..b3c8c11 100644
--- a/source/org/thdl/tib/text/ttt/PackageTest.java
+++ b/source/org/thdl/tib/text/ttt/PackageTest.java
@@ -128,7 +128,7 @@ public class PackageTest extends TestCase {
         }
 
         {
-            TStackListList legalParses = pt.getUniqueParse();
+            TStackListList legalParses = pt.getUniqueParse(false);
             boolean goodness2 = (expectedLegalParses == null
                                  || expectedLegalParses.length == legalParses.size());
             for (int i = 0 ; i < legalParses.size(); i++) {
@@ -139,18 +139,21 @@ public class PackageTest extends TestCase {
                                 || expectedLegalParses.length < i+1
                                 || n.equals(expectedLegalParses[i]));
                 if (!okay || !goodness2)
-                    System.out.println("Legal parse " + (i) + " (from zero) is " + n + " (toString2=" + n.toString2() + ") and expected is " + expectedLegalParses[i]);
+                    System.out.println("Legal parse " + (i) + " (from zero) is " + n + " (toString2=" + n.toString2() + ") and expected is "
+                                       + ((i < expectedLegalParses.length)
+                                          ? expectedLegalParses[i]
+                                          : "not present"));
                 assertTrue(okay);
             }
             if (!goodness2)
-                System.out.println("You expected " + expectedLegalParses.length + " legal parses, but there were instead " + legalParses.size() + " legal parses.");
+                System.out.println("You expected " + expectedLegalParses.length + " legal parses, but there were instead " + legalParses.size() + " legal parses for ACIP " + acip + ".");
             assertTrue(goodness2);
             TStackListList allLegalParses = pt.getLegalParses();
             TStackListList decentParses = pt.getNonIllegalParses();
             if (pt.getBestParse() == null) {
                 if (legalParses.size() == 0) {
                     if (null != expectedBestParse && !"".equals(expectedBestParse)) {
-                        System.out.print("Expected is that there is a best parse \"" + expectedBestParse + "\" but there is no best parse for acip {" + acip + "}");
+                        System.out.print("Expected is that there is a best parse \"" + expectedBestParse + "\" but there is no best parse for ACIP {" + acip + "}");
                         assertTrue(false);
                     }
                     System.out.print("ACIPNoBestParseError: There is no best parse for the ACIP {" + acip + "}; ");
@@ -163,7 +166,7 @@ public class PackageTest extends TestCase {
                     }
                 } else {
                     if (legalParses.size() > 1) {
-                        System.out.println("ACIPTooManyLegalParsesError: see these " + legalParses.size() + " legal parses for acip " + acip + ": " + legalParses);
+                        System.out.println("ACIPTooManyLegalParsesError: see these " + legalParses.size() + " legal parses for ACIP " + acip + ": " + legalParses);
                         assertTrue(legalParses.size() == 2
                                    && (legalParses.get(0).size()
                                        == 1 + legalParses.get(1).size()));
@@ -176,7 +179,7 @@ public class PackageTest extends TestCase {
                 if (null != expectedBestParse) {
                     boolean good = pt.getBestParse().equals(expectedBestParse);
                     if (!good) {
-                        System.out.print("Expected best parse is \"" + expectedBestParse + "\" but the best parse is " + pt.getBestParse() + " for acip {" + acip + "}");
+                        System.out.print("Expected best parse is \"" + expectedBestParse + "\" but the best parse is " + pt.getBestParse() + " for ACIP {" + acip + "}");
                     }
                     assertTrue(good);
                 }
@@ -229,6 +232,116 @@ public class PackageTest extends TestCase {
      *  {@link TPairList#getACIPError()}, and {@link
      *  TPairList#recoverACIP()}. */
     public void testBreakACIPIntoChunks() {
+tstHelper("GASN"); // ambiguous with regard to prefix rules
+tstHelper("BARMA"); // ambiguous with regard to prefix rules
+tstHelper("MARDA"); // ambiguous with regard to prefix rules
+tstHelper("BBA"); // ambiguous with regard to prefix rules
+tstHelper("BBLUGS"); // ambiguous with regard to prefix rules
+tstHelper("BDRA"); // ambiguous with regard to prefix rules
+tstHelper("BDRAG"); // ambiguous with regard to prefix rules
+tstHelper("BDRA'I"); // ambiguous with regard to prefix rules
+tstHelper("BDRAL"); // ambiguous with regard to prefix rules
+tstHelper("BDRAN"); // ambiguous with regard to prefix rules
+tstHelper("BDRANGS"); // ambiguous with regard to prefix rules
+tstHelper("BDREN"); // ambiguous with regard to prefix rules
+tstHelper("BDRI"); // ambiguous with regard to prefix rules
+tstHelper("BDRIS"); // ambiguous with regard to prefix rules
+tstHelper("BDROL"); // ambiguous with regard to prefix rules
+tstHelper("BDRUG"); // ambiguous with regard to prefix rules
+tstHelper("BLCAG"); // ambiguous with regard to prefix rules
+tstHelper("BLCI"); // ambiguous with regard to prefix rules
+tstHelper("BLKONG"); // ambiguous with regard to prefix rules
+tstHelper("BLNGA"); // ambiguous with regard to prefix rules
+tstHelper("BLNGAG"); // ambiguous with regard to prefix rules
+tstHelper("BMA"); // ambiguous with regard to prefix rules
+tstHelper("BMYOD"); // ambiguous with regard to prefix rules
+tstHelper("BSALDA"); // ambiguous with regard to prefix rules
+tstHelper("BSAMS"); // ambiguous with regard to prefix rules
+tstHelper("BSEMS"); // ambiguous with regard to prefix rules
+tstHelper("BTSAMS"); // ambiguous with regard to prefix rules
+tstHelper("BTSIMS"); // ambiguous with regard to prefix rules
+tstHelper("DDANG"); // ambiguous with regard to prefix rules
+tstHelper("DDAR"); // ambiguous with regard to prefix rules
+tstHelper("DDRANGS"); // ambiguous with regard to prefix rules
+tstHelper("DDRUG"); // ambiguous with regard to prefix rules
+tstHelper("DNAG"); // ambiguous with regard to prefix rules
+tstHelper("DNOGS"); // ambiguous with regard to prefix rules
+tstHelper("DRBAN"); // ambiguous with regard to prefix rules
+tstHelper("DRGYU"); // ambiguous with regard to prefix rules
+tstHelper("DRTOG"); // ambiguous with regard to prefix rules
+tstHelper("DYA"); // ambiguous with regard to prefix rules
+tstHelper("DYAN"); // ambiguous with regard to prefix rules
+tstHelper("GDRA"); // ambiguous with regard to prefix rules
+tstHelper("GDRIM"); // ambiguous with regard to prefix rules
+tstHelper("GGAN"); // ambiguous with regard to prefix rules
+tstHelper("GGYUR"); // ambiguous with regard to prefix rules
+tstHelper("GLTAR"); // ambiguous with regard to prefix rules
+tstHelper("GLTUNG"); // ambiguous with regard to prefix rules
+tstHelper("GMA"); // ambiguous with regard to prefix rules
+tstHelper("GMAN"); // ambiguous with regard to prefix rules
+tstHelper("GMON"); // ambiguous with regard to prefix rules
+tstHelper("GRDEGS"); // ambiguous with regard to prefix rules
+tstHelper("GRDZU"); // ambiguous with regard to prefix rules
+tstHelper("GRGYA"); // ambiguous with regard to prefix rules
+tstHelper("GRNAGS"); // ambiguous with regard to prefix rules
+tstHelper("GRTAN"); // ambiguous with regard to prefix rules
+tstHelper("GRTOGS"); // ambiguous with regard to prefix rules
+tstHelper("GRTZO"); // ambiguous with regard to prefix rules
+tstHelper("GRTZOD"); // ambiguous with regard to prefix rules
+tstHelper("GRTZON"); // ambiguous with regard to prefix rules
+tstHelper("GSLA"); // ambiguous with regard to prefix rules
+tstHelper("GSNAD"); // ambiguous with regard to prefix rules
+tstHelper("GZLA"); // ambiguous with regard to prefix rules
+tstHelper("MBA"); // ambiguous with regard to prefix rules
+tstHelper("MBA'"); // ambiguous with regard to prefix rules
+tstHelper("MBI'I"); // ambiguous with regard to prefix rules
+tstHelper("MHA'A"); // ambiguous with regard to prefix rules
+tstHelper("MRDA"); // ambiguous with regard to prefix rules
+tstHelper("MRDO"); // ambiguous with regard to prefix rules
+tstHelper("MRDZOGS"); // ambiguous with regard to prefix rules
+tstHelper("MRGA"); // ambiguous with regard to prefix rules
+tstHelper("MRGAD"); // ambiguous with regard to prefix rules
+tstHelper("MRGAN"); // ambiguous with regard to prefix rules
+tstHelper("MRJES"); // ambiguous with regard to prefix rules
+tstHelper("MRJOD"); // ambiguous with regard to prefix rules
+tstHelper("MRTOGS"); // ambiguous with regard to prefix rules
+tstHelper("MRTOL"); // ambiguous with regard to prefix rules
+tstHelper("MRTZE'I"); // ambiguous with regard to prefix rules
+tstHelper("MRTZIGS"); // ambiguous with regard to prefix rules
+tstHelper("MSAM"); // ambiguous with regard to prefix rules
+tstHelper("MSGRIB"); // ambiguous with regard to prefix rules
+tstHelper("MSKYES"); // ambiguous with regard to prefix rules
+tstHelper("MSON"); // ambiguous with regard to prefix rules
+tstHelper("MSOS"); // ambiguous with regard to prefix rules
+tstHelper("MSTAMS"); // ambiguous with regard to prefix rules
+tstHelper("MSTAN"); // ambiguous with regard to prefix rules
+
+
+
+
+
+        // If you're not careful, you'll think GGYES is a legal
+        // Tibetan tsheg bar and parse it as {G}{G+YE}{S}.  But it's
+        // Sanskrit, really, because GA doesn't take a GA prefix.
+        // This doesn't occur in ACIP input files that I've seen, but
+        // GGYI (S1000I.INC) and GGYUR (S5275MC4.ACT) do occur.
+        tstHelper("GGYES", "{G}{G}{YE}{S}",
+                  new String[] { "{G}{G}{YE}{S}", "{G}{G+YE}{S}", "{G+G}{YE}{S}" },
+                  new String[] { },
+                  "{G+G}{YE}{S}");
+
+        tstHelper("DRUG", "{D}{RU}{G}",
+                  new String[] { "{D}{RU}{G}", "{D+RU}{G}" },
+                  new String[] { "{D+RU}{G}" },
+                  "{D+RU}{G}");
+
+
+        tstHelper("d+H+d+HA", "{d+}{H+}{d+}{HA}",
+                  new String[] { "{d+H+d+HA}" },
+                  new String[] { "{d+H+d+HA}" });
+
+        tstHelper("Gd+H+d+HA");
+
         tstHelper("AUTPA", "{AU}{T}{PA}",
                   new String[] { "{AU}{T}{PA}", "{AU}{T+PA}" },
                   new String[] { },
@@ -249,7 +362,8 @@ public class PackageTest extends TestCase {
                   new String[] { "{G+R+VA}{'I}" });
         tstHelper("G-RVA'I", "{G-}{R}{VA}{'I}",
                   new String[] { "{G}{R+VA}{'I}" },
-                  new String[] { "{G}{R+VA}{'I}" });
+                  new String[] { },
+                  "{G}{R+VA}{'I}");
         tstHelper("RVA", "{R}{VA}",
                   new String[] { "{R+VA}" },
                   new String[] { "{R+VA}" });
@@ -6967,8 +7081,8 @@ tstHelper("ZUR");
               "",
               "[TIBETAN_NON_PUNCTUATION:{LA}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, TIBETAN_NON_PUNCTUATION:{SGRUB}]"); // DLC FIXME
         shelp("PAS... LA",
-              "Offset 5: A non-breaking tsheg, '.', appeared, but not like \"...,\" or \".,\" or \".dA\" or \".DA\".\n",
-              "[TIBETAN_NON_PUNCTUATION:{PAS}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, ERROR:{.}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{LA}]");
+              "Offset 5 or maybe 5: A non-breaking tsheg, '.', appeared, but not like \"...,\" or \".,\" or \".dA\" or \".DA\".\n",
+              "[TIBETAN_NON_PUNCTUATION:{PAS}, TIBETAN_PUNCTUATION:{.}, TIBETAN_PUNCTUATION:{.}, ERROR:{A non-breaking tsheg, '.', appeared, but not like \"...,\" or \".,\" or \".dA\" or \".DA\".}, TIBETAN_PUNCTUATION:{ }, TIBETAN_NON_PUNCTUATION:{LA}]");
         shelp("PAS... LA",
               "",
               true,
@@ -6983,28 +7097,28 @@ tstHelper("ZUR");
         shelp("", "", "[]");
         shelp("[DD]", "");
         shelp("[",
-              "Offset 0: Found an illegal open bracket (in context, this is [).  Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset END: Truly unmatched open bracket found.\n");
+              "Offset 0 or maybe 0: Found an illegal open bracket (in context, this is [).  Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset END: Truly unmatched open bracket found.\n");
         shelp("{",
-              "Offset 0: Found an illegal open bracket (in context, this is {).  Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset END: Truly unmatched open bracket found.\n");
+              "Offset 0 or maybe 0: Found an illegal open bracket (in context, this is {).  Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset END: Truly unmatched open bracket found.\n");
         shelp("DD", "");
         shelp("DD]",
-              "Offset 2: Found a truly unmatched close bracket, ] or }.\nOffset 2: Found a closing bracket without a matching open bracket.  Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
+              "Offset 2 or maybe 2: Found a truly unmatched close bracket, ] or }.\nOffset 2 or maybe 2: Found a closing bracket without a matching open bracket.  Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
 
-        shelp("///NYA", "Offset 1: Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\nOffset END: Slashes are supposed to occur in pairs, but the input had an unmatched '/' character.\n");
+        shelp("///NYA", "Offset 1 or maybe 1: Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\nOffset END: Slashes are supposed to occur in pairs, but the input had an unmatched '/' character.\n");
         shelp("/NYA/", "");
         shelp("[?][BP][LS][DD1][DD2][DDD][DR][# (<{A COMMENT)}>]", "");
         shelp("[LS][# A [[[[[COMMENT][LS]",
-              "Offset 9: Found an open bracket within a [#COMMENT]-style comment.  Brackets may not appear in comments.\n"
-              + "Offset 10: Found an open bracket within a [#COMMENT]-style comment.  Brackets may not appear in comments.\n"
-              + "Offset 11: Found an open bracket within a [#COMMENT]-style comment.  Brackets may not appear in comments.\n"
-              + "Offset 12: Found an open bracket within a [#COMMENT]-style comment.  Brackets may not appear in comments.\n"
-              + "Offset 13: Found an open bracket within a [#COMMENT]-style comment.  Brackets may not appear in comments.\n");
+              "Offset 9 or maybe 9: Found an open bracket within a [#COMMENT]-style comment.  Brackets may not appear in comments.\n"
+              + "Offset 10 or maybe 10: Found an open bracket within a [#COMMENT]-style comment.  Brackets may not appear in comments.\n"
+              + "Offset 11 or maybe 11: Found an open bracket within a [#COMMENT]-style comment.  Brackets may not appear in comments.\n"
+              + "Offset 12 or maybe 12: Found an open bracket within a [#COMMENT]-style comment.  Brackets may not appear in comments.\n"
+              + "Offset 13 or maybe 13: Found an open bracket within a [#COMMENT]-style comment.  Brackets may not appear in comments.\n");
         shelp("[ILLEGAL COMMENT]",
-              "Offset 0: Found an illegal open bracket (in context, this is [ILLEGAL C...).  Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset 16: Found a closing bracket without a matching open bracket.  Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
+              "Offset 0 or maybe 0: Found an illegal open bracket (in context, this is [ILLEGAL C...).  Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset 16 or maybe 16: Found a closing bracket without a matching open bracket.  Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
         shelp("(BSKYABS GRO)", ""); // DLC WHAT ARE THESE FOR?
-        shelp("BSKYABS GRO)", "Offset 11: Unexpected closing parenthesis, ), found.\n");
+        shelp("BSKYABS GRO)", "Offset 11 or maybe 11: Unexpected closing parenthesis, ), found.\n");
         shelp("BSKYABS GRO(", "Offset END: Unmatched open parenthesis, (, found.\n");
-        shelp("((NESTAGE))", "Offset 1: Found an illegal open parenthesis, (.  Nesting of parentheses is not allowed.\nOffset 10: Unexpected closing parenthesis, ), found.\n");
+        shelp("((NESTAGE))", "Offset 1 or maybe 1: Found an illegal open parenthesis, (.  Nesting of parentheses is not allowed.\nOffset 10 or maybe 10: Unexpected closing parenthesis, ), found.\n");
         shelp("(BA)(PA)NYA(CA)", "");
         shelp("NYAx", "");
         shelp("NYA x", "");
@@ -7033,9 +7147,9 @@ tstHelper("ZUR");
         shelp("(NYA ", "Offset END: Unmatched open parenthesis, (, found.\n");
         shelp("[*NYA ", "Offset END: Unmatched open bracket found.  A correction does not terminate.\n");
         shelp("?", "", "[QUESTION:{?}]");
-        shelp("KHAN~ BAR ", "Offset 4: Found an illegal character, ~, with ordinal 126.\n");
+        shelp("KHAN~ BAR ", "Offset 4 or maybe 4: Found an illegal character, ~, with ordinal 126.\n");
         shelp("[* Correction with []]",
-              "Offset 5: Found an illegal character, r, with ordinal 114.\nOffset 6: Found an illegal character, r, with ordinal 114.\nOffset 7: Found an illegal character, e, with ordinal 101.\nOffset 8: Found an illegal character, c, with ordinal 99.\nOffset 14: Found an illegal character, w, with ordinal 119.\nOffset 19: Found an illegal open bracket (in context, this is []]).  Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset 21: Found a closing bracket without a matching open bracket.  Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
+              "Offset 5 or maybe 5: Found an illegal character, r, with ordinal 114.\nOffset 6 or maybe 6: Found an illegal character, r, with ordinal 114.\nOffset 7 or maybe 7: Found an illegal character, e, with ordinal 101.\nOffset 8 or maybe 8: Found an illegal character, c, with ordinal 99.\nOffset 14 or maybe 14: Found an illegal character, w, with ordinal 119.\nOffset 19 or maybe 19: Found an illegal open bracket (in context, this is []]).  Perhaps there is a [#COMMENT] written incorrectly as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], or an unmatched open bracket?\nOffset 21 or maybe 21: Found a closing bracket without a matching open bracket.  Perhaps a [#COMMENT] incorrectly written as [COMMENT], or a [*CORRECTION] written incorrectly as [CORRECTION], caused this.\n");
 
         // DLC FIXME: the line SDIG PA'I GROGS PO'I LAG TU SON PAR 'GYUR PA is followed by a blank line.  Note that it's "PA", not "PA ", ending it.  Autocorrect to the latter.
 
@@ -7051,8 +7165,8 @@ tstHelper("ZUR");
             uhelp(" 1\\ ", "\u0f0b\u0f21\u0f84\u0f0b");
         }
         shelp("K\\,",
-              "Offset 1: Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly.  Sorry!  Please do complain to the maintainers.\n",
-              "[TIBETAN_NON_PUNCTUATION:{K}, ERROR:{\\}, TIBETAN_PUNCTUATION:{,}]");
+              "Offset 1 or maybe 1: Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly.  Sorry!  Please do complain to the maintainers.\n",
+              "[TIBETAN_NON_PUNCTUATION:{K}, ERROR:{Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly.  Sorry!  Please do complain to the maintainers.}, TIBETAN_PUNCTUATION:{,}]");
 
 
         shelp("MTHAR%", "", "[TIBETAN_NON_PUNCTUATION:{MTHAR%}]");
@@ -7073,15 +7187,15 @@ tstHelper("ZUR");
         shelp("@01A.3 ", "", "[FOLIO_MARKER:{@01A.3}, TIBETAN_PUNCTUATION:{ }]");
         shelp("@001 ", "", "[FOLIO_MARKER:{@001}, TIBETAN_PUNCTUATION:{ }]");
         shelp("@19-20A",
-              "Offset 0: Found an illegal at sign, @ (in context, this is @19-20A).  @012B is an example of a legal folio marker.\n",
-              "[ERROR:{@}, TIBETAN_NON_PUNCTUATION:{19-20A}]");  // DLC FIXME: yes it occurs in the kangyur.
+              "Offset 0 or maybe 0: Found an illegal at sign, @ (in context, this is @19-20A).  @012B is an example of a legal folio marker.\n",
+              "[ERROR:{Found an illegal at sign, @ (in context, this is @19-20A).  @012B is an example of a legal folio marker.}, TIBETAN_NON_PUNCTUATION:{19-20A}]");  // DLC FIXME: yes it occurs in the kangyur.
         shelp("@[7B]", "");
         shelp("@012A.3KA",
               "",
               "[FOLIO_MARKER:{@012A.3}, TIBETAN_NON_PUNCTUATION:{KA}]");
         shelp("@012A.34",
-              "Offset 0: Found an illegal at sign, @ (in context, this is @012A.34).  This folio marker has a period, '.', at the end of it, which is illegal.\n",
-              "[ERROR:{@012A.}, TIBETAN_NON_PUNCTUATION:{34}]");
+              "Offset 0 or maybe 0: Found an illegal at sign, @ (in context, this is @012A.34).  This folio marker has a period, '.', at the end of it, which is illegal.\n",
+              "[ERROR:{Found an illegal at sign, @ (in context, this is @012A.34).  This folio marker has a period, '.', at the end of it, which is illegal.}, TIBETAN_NON_PUNCTUATION:{34}]");
         shelp("@[07B]", "");
         shelp("@[00007B]", "");
         shelp("@7B", "");
@@ -7097,8 +7211,8 @@ tstHelper("ZUR");
         shelp("{ DD }", "", "[DD:{{ DD }}]"); // TD3790E2.ACT
         shelp("{ BP }", "", "[BP:{{ BP }}]"); // TD3790E2.ACT
         shelp("//NYA\\\\",
-              "Offset 1: Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\nOffset 5: Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly.  Sorry!  Please do complain to the maintainers.\nOffset 6: Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly.  Sorry!  Please do complain to the maintainers.\n",
-              "[START_SLASH:{/}, ERROR:{//}, END_SLASH:{/}, TIBETAN_NON_PUNCTUATION:{NYA}, ERROR:{\\}, ERROR:{\\}]");
+              "Offset 1 or maybe 1: Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.\nOffset 5 or maybe 5: Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly.  Sorry!  Please do complain to the maintainers.\nOffset 6 or maybe 6: Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly.  Sorry!  Please do complain to the maintainers.\n",
+              "[START_SLASH:{/}, ERROR:{Found //, which could be legal (the Unicode would be \\u0F3C\\u0F3D), but is likely in an illegal construct like //NYA\\\\.}, END_SLASH:{/}, TIBETAN_NON_PUNCTUATION:{NYA}, ERROR:{Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly.  Sorry!  Please do complain to the maintainers.}, ERROR:{Found a Sanskrit virama, \\, but the converter currently doesn't treat these properly.  Sorry!  Please do complain to the maintainers.}]");
 
     }
     private static void uhelp(String acip) {
@@ -7106,7 +7220,7 @@ tstHelper("ZUR");
     }
     private static void uhelp(String acip, String expectedUnicode) {
         StringBuffer errors = new StringBuffer();
-        String unicode = ACIPConverter.convertToUnicode(acip, errors);
+        String unicode = ACIPConverter.convertToUnicode(acip, errors, null, true);
         if (null == unicode) {
             if (null != expectedUnicode && "none" != expectedUnicode) {
                 System.out.println("No unicode exists for " + acip + " but you expected " + org.thdl.tib.text.tshegbar.UnicodeUtils.unicodeStringToPrettyString(expectedUnicode));
@@ -8729,22 +8843,22 @@ tstHelper("shKA");
 }
 /* DLC FIXME: add test cases: from R0021F.ACE: ambiguous Tibetan/Sanskrit:
 
- BDA'  ���� 
-B+DA   ���
-DBANG  ��� 
-D+BA   ���
-DGA'  ���� 
-D+GA   ���
-DGRA   ��� 
-D+GRA  ���
-DGYES����� 
-D+GYA  ��� 
-DMAR  ����
-D+MA   ���
-GDA'  ����
-G+DA   ���
-GNAD  ����
-G+NA   ���
-MNA'  ����
-M+NA    ��� 
+BDA'
+B+DA
+DBANG
+D+BA
+DGA'
+D+GA
+DGRA
+D+GRA
+DGYES
+D+GYA
+DMAR
+D+MA
+GDA'
+G+DA
+GNAD
+G+NA
+MNA'
+M+NA
 */
diff --git a/source/org/thdl/tib/text/ttt/TPairList.java b/source/org/thdl/tib/text/ttt/TPairList.java
index 1d97639..c1ebfd5 100644
--- a/source/org/thdl/tib/text/ttt/TPairList.java
+++ b/source/org/thdl/tib/text/ttt/TPairList.java
@@ -520,7 +520,8 @@ class TPairList {
      *  corresponds to exactly one Tibetan grapheme cluster (i.e.,
      *  stack).  Note that U+0F7F (ACIP {:}) is part of a stack, not a
      *  stack all on its own. */
-    void populateWithTGCPairs(ArrayList pl, ArrayList indexList, int index) {
+    void populateWithTGCPairs(ArrayList pl,
+                              ArrayList indexList, int index) {
         int sz = size();
         if (sz == 0) {
             return;
@@ -540,8 +541,8 @@ class TPairList {
             // The last pair:
             TPair p = get(i);
             ThdlDebug.verify(!"+".equals(p.getRight()));
-            int where;
             boolean add_U0F7F = false;
+            int where;
             if (p.getRight() != null
                 && (where = p.getRight().indexOf(':')) >= 0) {
                 // this ':' guy is his own TGCPair.
@@ -579,27 +580,21 @@ class TPairList {
             }
             TGCPair tp;
             indexList.add(new Integer(index));
-            tp = new TGCPair(lWylie.toString()
-                             + (hasNonAVowel
-                                ? ACIPRules.getWylieForACIPVowel(p.getRight())
-                                : ""),
+            tp = new TGCPair(lWylie.toString(),
+                             (hasNonAVowel
+                              ? ACIPRules.getWylieForACIPVowel(p.getRight())
+                              : ""),
                              (isNumeric
-                              ? TGCPair.OTHER
-                              : (hasNonAVowel
-                                 ? (isSanskrit
-                                    ? TGCPair.SANSKRIT_WITH_VOWEL
-                                    : (isTibetan
-                                       ? TGCPair.CONSONANTAL_WITH_VOWEL
-                                       : TGCPair.OTHER))
-                                 : (isSanskrit
-                                    ? TGCPair.SANSKRIT_WITHOUT_VOWEL
-                                    : (isTibetan
-                                       ? TGCPair.CONSONANTAL_WITHOUT_VOWEL
-                                       : TGCPair.OTHER)))));
+                              ? TGCPair.TYPE_OTHER
+                              : (isSanskrit
+                                 ? TGCPair.TYPE_SANSKRIT
+                                 : (isTibetan
+                                    ? TGCPair.TYPE_TIBETAN
+                                    : TGCPair.TYPE_OTHER))));
             pl.add(tp);
             if (add_U0F7F) {
                 indexList.add(new Integer(index));
-                pl.add(new TGCPair("H", TGCPair.OTHER));
+                pl.add(new TGCPair("H", null, TGCPair.TYPE_OTHER));
             }
         }
     }
diff --git a/source/org/thdl/tib/text/ttt/TParseTree.java b/source/org/thdl/tib/text/ttt/TParseTree.java
index ea83648..2dffa42 100644
--- a/source/org/thdl/tib/text/ttt/TParseTree.java
+++ b/source/org/thdl/tib/text/ttt/TParseTree.java
@@ -91,7 +91,7 @@ class TParseTree {
         ParseIterator pi = getParseIterator();
         while (pi.hasNext()) {
             TStackList sl = pi.next();
-            if (sl.isLegalTshegBar().isLegal) {
+            if (sl.isLegalTshegBar(false).isLegal) {
                 sll.add(sl);
             }
         }
@@ -118,12 +118,12 @@ class TParseTree {
      *  a unique non-illegal parse, you get it.  If there's not a
      *  unique answer, null is returned. */
     // {TZANDRA} is not solved by this, DLC NOW.  Solve PADMA PROBLEM!
-
     // DLC by using this we can get rid of single-sanskrit-gc, eh?
     public TStackList getBestParse() {
-        TStackListList up = getUniqueParse();
+        TStackListList up = getUniqueParse(false);
         if (up.size() == 1)
             return up.get(0);
+
         up = getNonIllegalParses();
         int sz = up.size();
         if (sz == 1) {
@@ -192,14 +192,17 @@ class TParseTree {
      *  legal parses if there two or more equally good parses.  By
      *  &quot;legal&quot;, we mean a sequence of stacks that is legal
      *  by the rules of Tibetan tsheg bar syntax (sometimes called
-     *  spelling). */
-    public TStackListList getUniqueParse() {
+     *  spelling).
+     *  @param noPrefixTests true if you want to pretend that every
+     *  stack can take every prefix, which is not the case in
+     *  reality */
+    public TStackListList getUniqueParse(boolean noPrefixTests) {
         TStackListList allLegalParses = new TStackListList(2); // save memory
         TStackListList legalParsesWithVowelOnRoot = new TStackListList(1);
         ParseIterator pi = getParseIterator();
         while (pi.hasNext()) {
             TStackList sl = pi.next();
-            BoolPair bpa = sl.isLegalTshegBar();
+            BoolPair bpa = sl.isLegalTshegBar(noPrefixTests);
             if (bpa.isLegal) {
                 if (bpa.isLegalAndHasAVowelOnRoot)
                     legalParsesWithVowelOnRoot.add(sl);
@@ -253,13 +256,23 @@ class TParseTree {
     public String getWarning(boolean paranoid,
                              TPairList pl,
                              String originalACIP) {
-        TStackListList up = getUniqueParse();
+
+        {
+            TStackList bestParse = getBestParse();
+            TStackListList noPrefixTestsUniqueParse = getUniqueParse(true);
+            if (noPrefixTestsUniqueParse.size() == 1
+                && !noPrefixTestsUniqueParse.get(0).equals(bestParse)) {
+                return "Warning: We're going with " + bestParse + ", but only because our knowledge of prefix rules says that " + noPrefixTestsUniqueParse.get(0) + " is not a legal Tibetan tsheg bar (\"syllable\")";
+            }
+        }
+
+        TStackListList up = getUniqueParse(false);
         if (null == up || up.size() != 1) {
             boolean isLastStack[] = new boolean[1];
             TStackListList nip = getNonIllegalParses();
             if (nip.size() != 1) {
                 if (null == getBestParse()) {
-                    return "There's not even a unique, non-illegal parse for ACIP {" + ((null != originalACIP) ? originalACIP : recoverACIP()) + "}";
+                    return "Warning: There's not even a unique, non-illegal parse for ACIP {" + ((null != originalACIP) ? originalACIP : recoverACIP()) + "}";
                 } else {
                     if (getBestParse().hasStackWithoutVowel(pl, isLastStack)) {
                         if (isLastStack[0]) {
@@ -269,7 +282,7 @@ class TParseTree {
                         }
                     }
                     if (paranoid) {
-                        return "Though the ACIP {" + ((null != originalACIP) ? originalACIP : recoverACIP()) + "} is unambiguous, it would be more computer-friendly if + signs were used to stack things because there are two (or more) ways to interpret this ACIP if you're not careful.";
+                        return "Warning: Though the ACIP {" + ((null != originalACIP) ? originalACIP : recoverACIP()) + "} is unambiguous, it would be more computer-friendly if + signs were used to stack things because there are two (or more) ways to interpret this ACIP if you're not careful.";
                     }
                 }
             } else {
diff --git a/source/org/thdl/tib/text/ttt/TStackList.java b/source/org/thdl/tib/text/ttt/TStackList.java
index 1b01308..5db6847 100644
--- a/source/org/thdl/tib/text/ttt/TStackList.java
+++ b/source/org/thdl/tib/text/ttt/TStackList.java
@@ -125,15 +125,17 @@ class TStackList {
      *  Tibetan syntax (sometimes called rules of spelling).  If this
      *  is legal, then {@link BoolPair#isLegalAndHasAVowelOnRoot} will
      *  be true if and only if there is an explicit {A} vowel on the
-     *  root stack. */
-    public BoolPair isLegalTshegBar() {
-        // DLC handle PADMA and other Tibetanized Sanskrit fellows.  Right now we only handle single-stack guys.
+     *  root stack.
+     *  @param noPrefixTests true if you want to pretend that every
+     *  stack can take every prefix, which is not the case in
+     *  reality */
+    public BoolPair isLegalTshegBar(boolean noPrefixTests) {
+        // DLC handle PADMA and other Tibetanized Sanskrit fellows consistently.  Right now we only treat single-stack Sanskrit guys as legal.
 
         TTGCList tgcList = new TTGCList(this);
         StringBuffer warnings = new StringBuffer();
         String candidateType
-            = TibTextUtils.getClassificationOfTshegBar(tgcList, warnings);
-        // System.out.println("DLC: " + toString() + " has candidateType " + candidateType + " and warnings " + warnings);
+            = TibTextUtils.getClassificationOfTshegBar(tgcList, warnings, noPrefixTests);
 
         // preliminary answer:
         boolean isLegal = (candidateType != "invalid");