Added an unfinished ACIP->Tibetan converter. Once it works properly

for ACIP, it'll easily be made to work as a perfect EWTS Wylie->Tibetan converter. It has an extensive suite of tests for the existing functionality.
2003-08-10 19:30:07 +00:00 · 2003-08-10 19:30:07 +00:00 · e21d3774a9
commit e21d3774a9
parent 39e0435b6b
14 changed files with 8709 additions and 21 deletions
--- a/source/org/thdl/tib/text/package.html
+++ b/source/org/thdl/tib/text/package.html
@ -5,7 +5,7 @@

  @(#)package.html

-  Copyright 2001-2002 Tibetan and Himalayan Digital Library
+  Copyright 2001-2003 Tibetan and Himalayan Digital Library

  This software is the confidential and proprietary information of
  the Tibetan and Himalayan Digital Library. You shall use such
@ -18,12 +18,14 @@

 Provides classes and methods for dealing with Tibetan text.
 <p>
-Designed for use with the Tibetan Computer
-Company's free cross-platform TibetanMachineWeb fonts, this package
-contains methods for getting the Extended Wylie
-correspondences for each TibetanMachineWeb glyph, and for
-convert back and forth between Extended
-Wylie and TibetanMachineWeb.
+Designed for use with the Tibetan Computer Company's free
+cross-platform TibetanMachineWeb fonts, this package contains methods
+for getting the Extended Wylie correspondences for each
+TibetanMachineWeb glyph, and for convert back and forth between
+Extended Wylie and TibetanMachineWeb.  The TMW to Wylie conversion is
+perfect, but the Wylie to TMW is flawed, so use the code in package
+org.thdl.tib.text.ttt instead for serious work.  The Wylie to TMW here
+is more like a keyboard than a real Wylie to TMW conversion.
 <p>
 This package provides a variety of ways to store TibetanMachineWeb data,
 and includes methods to aid programmers who want to convert from
@ -34,5 +36,6 @@ keyboards. Four keyboards have been provided in this release,
 but users may also create their own keyboards.
 <h2>Related Documentation</h2>
@see <a href="../input/package-summary.html">org.thdl.tib.input</a>
+@see <a href="ttt/package-summary.html">org.thdl.tib.text.ttt</a>
 </body>
 </html>
--- a/source/org/thdl/tib/text/ttt/ACIPRules.java
+++ b/source/org/thdl/tib/text/ttt/ACIPRules.java
@ -0,0 +1,207 @@
+/*
+The contents of this file are subject to the THDL Open Community License
+Version 1.0 (the "License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License on the THDL web site 
+(http://www.thdl.org/).
+
+Software distributed under the License is distributed on an "AS IS" basis, 
+WITHOUT WARRANTY OF ANY KIND, either express or implied. See the 
+License for the specific terms governing rights and limitations under the 
+License. 
+
+The Initial Developer of this software is the Tibetan and Himalayan Digital
+Library (THDL). Portions created by the THDL are Copyright 2003 THDL.
+All Rights Reserved. 
+
+Contributor(s): ______________________________________.
+*/
+
+package org.thdl.tib.text.ttt;
+
+import java.util.HashSet;
+import java.util.HashMap;
+
+/** Canonizes some facts regarding the ACIP transcription system.
+ *  @author David Chandler */
+class ACIPRules {
+    /** {Ksh}, the longest consonant, has 3 characters, so this is
+     *  three. */
+    public static int MAX_CONSONANT_LENGTH = 3;
+
+    /** {'im:}, the longest "vowel", has 4 characters, so this is
+     *  four. */
+    public static int MAX_VOWEL_LENGTH = 4;
+
+    /** For O(1) {@link #isVowel(String)} calls. */
+    private static HashSet acipVowels = null;
+
+    private static String[][] baseVowels = new String[][] {
+        // { ACIP, EWTS }:
+        { "A", "a" },
+        { "I", "i" },
+        { "U", "u" },
+        { "E", "e" },
+        { "O", "o" },
+        { "'I", "I" },
+        { "'U", "U" },
+        { "EE", "ai" },
+        { "OO", "au" },
+        { "i", "-i" },
+        { "'i", "-I" },
+        { "'A", "A" },
+        { "'O", "Ao" },
+        { "'E", "Ae" }
+        // DLC I'm on my own with 'O and 'E, but GANG'O appears
+        // and I wonder... so here are 'O and 'E.  It's
+        // consistent with 'I and 'A and 'U, at least.
+    };
+
+    /** Returns true if and only if s is an ACIP "vowel".  You can't
+     *  just call this any time -- A is a consonant and a vowel in
+     *  ACIP, so you have to call this in the right context. */
+    public static boolean isVowel(String s) {
+        if (null == acipVowels) {
+            acipVowels = new HashSet();
+            for (int i = 0; i < baseVowels.length; i++) {
+                acipVowels.add(baseVowels[i][0]);
+                acipVowels.add(baseVowels[i][0] + 'm');
+                acipVowels.add(baseVowels[i][0] + ':');
+                acipVowels.add(baseVowels[i][0] + "m:");
+                // DLC '\' for visarga? how shall we do \ the visarga? like a vowel or not?
+
+            }
+        }
+        return (acipVowels.contains(s));
+    }
+
+    /** For O(1) {@link #isConsonant(String)} calls. */
+    private static HashSet consonants = null;
+
+    /** Returns true if and only if acip is an ACIP consonant (without
+     *  a vowel). For example, returns true for "K", but not for
+     *  "KA" or "X". */
+    public static boolean isConsonant(String acip) {
+        if (consonants == null) {
+            consonants = new HashSet();
+            consonants.add("V");
+            consonants.add("K");
+            consonants.add("KH");
+            consonants.add("G");
+            consonants.add("NG");
+            consonants.add("C");
+            consonants.add("CH");
+            consonants.add("J");
+            consonants.add("NY");
+            consonants.add("T");
+            consonants.add("TH");
+            consonants.add("D");
+            consonants.add("N");
+            consonants.add("P");
+            consonants.add("PH");
+            consonants.add("B");
+            consonants.add("M");
+            consonants.add("TZ");
+            consonants.add("TS");
+            consonants.add("DZ");
+            consonants.add("W");
+            consonants.add("ZH");
+            consonants.add("Z");
+            consonants.add("Y");
+            consonants.add("R");
+            consonants.add("L");
+            consonants.add("SH");
+            consonants.add("S");
+            consonants.add("H");
+            consonants.add("t");
+            consonants.add("th");
+            consonants.add("d");
+            consonants.add("n");
+            consonants.add("sh");
+            consonants.add("dH");
+            consonants.add("DH");
+            consonants.add("BH");
+            consonants.add("DZH"); // longest, MAX_CONSONANT_LENGTH characters
+            consonants.add("Ksh"); // longest, MAX_CONSONANT_LENGTH characters
+            consonants.add("GH");
+            consonants.add("'");
+            consonants.add("A");
+        }
+        return consonants.contains(acip);
+    }
+
+    private static HashMap acipConsonant2wylie = null;
+    /** Returns the EWTS corresponding to the given ACIP consonant
+     *  (without the "A" vowel).  Returns null if there is no such
+     *  EWTS. */
+    static final String getWylieForACIPConsonant(String acip) {
+        if (acipConsonant2wylie == null) {
+            acipConsonant2wylie = new HashMap(37);
+
+            // oddball:
+            acipConsonant2wylie.put("V", "w");
+
+            // more oddballs:
+            acipConsonant2wylie.put("DH", "d+h");
+            acipConsonant2wylie.put("BH", "b+h");
+            acipConsonant2wylie.put("dH", "D+h");
+            acipConsonant2wylie.put("DZH", "dz+h");
+            acipConsonant2wylie.put("Ksh", "k+Sh");
+            acipConsonant2wylie.put("GH", "g+h");
+
+
+            acipConsonant2wylie.put("K", "k");
+            acipConsonant2wylie.put("KH", "kh");
+            acipConsonant2wylie.put("G", "g");
+            acipConsonant2wylie.put("NG", "ng");
+            acipConsonant2wylie.put("C", "c");
+            acipConsonant2wylie.put("CH", "ch");
+            acipConsonant2wylie.put("J", "j");
+            acipConsonant2wylie.put("NY", "ny");
+            acipConsonant2wylie.put("T", "t");
+            acipConsonant2wylie.put("TH", "th");
+            acipConsonant2wylie.put("D", "d");
+            acipConsonant2wylie.put("N", "n");
+            acipConsonant2wylie.put("P", "p");
+            acipConsonant2wylie.put("PH", "ph");
+            acipConsonant2wylie.put("B", "b");
+            acipConsonant2wylie.put("M", "m");
+            acipConsonant2wylie.put("TZ", "ts");
+            acipConsonant2wylie.put("TS", "tsh");
+            acipConsonant2wylie.put("DZ", "dz");
+            acipConsonant2wylie.put("W", "w");
+            acipConsonant2wylie.put("ZH", "zh");
+            acipConsonant2wylie.put("Z", "z");
+            acipConsonant2wylie.put("'", "'");
+            acipConsonant2wylie.put("Y", "y");
+            acipConsonant2wylie.put("R", "r");
+            acipConsonant2wylie.put("L", "l");
+            acipConsonant2wylie.put("SH", "sh");
+            acipConsonant2wylie.put("S", "s");
+            acipConsonant2wylie.put("H", "h");
+            acipConsonant2wylie.put("A", "a");
+            acipConsonant2wylie.put("t", "T");
+            acipConsonant2wylie.put("th", "Th");
+            acipConsonant2wylie.put("d", "D");
+            acipConsonant2wylie.put("n", "N");
+            acipConsonant2wylie.put("sh", "Sh");
+        }
+        return (String)acipConsonant2wylie.get(acip);
+    }
+
+    private static HashMap acipVowel2wylie = null;
+    /** Returns the EWTS corresponding to the given ACIP "vowel".
+     *  Returns null if there is no such EWTS. */
+    static final String getWylieForACIPVowel(String acip) {
+        if (acipVowel2wylie == null) {
+            acipVowel2wylie = new HashMap(baseVowels.length * 4);
+
+            for (int i = 0; i < baseVowels.length; i++) {
+                acipVowel2wylie.put(baseVowels[i][0], baseVowels[i][1]);
+                acipVowel2wylie.put(baseVowels[i][0] + 'm', baseVowels[i][1] + 'M');
+                acipVowel2wylie.put(baseVowels[i][0] + ':', baseVowels[i][1] + 'H');
+                acipVowel2wylie.put(baseVowels[i][0] + "m:", baseVowels[i][1] + "MH");
+            }
+        }
+        return (String)acipVowel2wylie.get(acip);
+    }
+}
--- a/source/org/thdl/tib/text/ttt/PackageTest.java
+++ b/source/org/thdl/tib/text/ttt/PackageTest.java
--- a/source/org/thdl/tib/text/ttt/ParseIterator.java
+++ b/source/org/thdl/tib/text/ttt/ParseIterator.java
@ -0,0 +1,100 @@
+/*
+The contents of this file are subject to the THDL Open Community License
+Version 1.0 (the "License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License on the THDL web site 
+(http://www.thdl.org/).
+
+Software distributed under the License is distributed on an "AS IS" basis, 
+WITHOUT WARRANTY OF ANY KIND, either express or implied. See the 
+License for the specific terms governing rights and limitations under the 
+License. 
+
+The Initial Developer of this software is the Tibetan and Himalayan Digital
+Library (THDL). Portions created by the THDL are Copyright 2003 THDL.
+All Rights Reserved. 
+
+Contributor(s): ______________________________________.
+*/
+
+package org.thdl.tib.text.ttt;
+
+import java.util.ListIterator;
+import java.util.NoSuchElementException;
+import java.util.ArrayList;
+
+/** An object that can iterate over an {@link #TParseTree}.
+ *
+ *  @author David Chandler */
+class ParseIterator {
+    private ArrayList al = null;
+    private int sz;
+    private ListIterator[] iterators;
+    private boolean first = true;
+    private boolean hasNextParse = true;
+    /** Constructs a new ParseIterator that iterates over a list of
+     *  TStackListLists. */
+    ParseIterator(ArrayList al) {
+        this.al = al;
+        sz = al.size();
+        iterators = new ListIterator[sz];
+        hasNextParse = false;
+        for (int i = 0; i < sz; i++) {
+            iterators[i] = ((TStackListList)al.get(i)).listIterator();
+            if (iterators[i].hasNext())
+                hasNextParse = true;
+        }
+    }
+
+    /** Returns true if and only if there is another parse
+     *  available. */
+    boolean hasNext() {
+        return hasNextParse;
+    }
+
+    /** Returns the next available parse. */
+    TStackList next() {
+        if (!hasNextParse)
+            throw new NoSuchElementException("no parses left");
+        if (first) {
+            first = false;
+            TStackList x = new TStackList();
+            for (int i = 0; i < sz; i++) {
+                TStackList nextSL = (TStackList)iterators[i].next();
+                x.addAll(nextSL);
+            }
+
+            // The next guy is found by taking the previous item of
+            // each iterator.
+            hasNextParse = false;
+            for (int i = sz - 1; i >= 0; i--) {
+                if (iterators[i].hasNext()) {
+                    iterators[i].next();
+                    hasNextParse = true;
+                    break;
+                }
+            }
+            return x;
+        }
+
+        // Up the rightmost iterator you can.  If you can, reset all
+        // guys to the right of it.  If you can't, we're done.
+        TStackList x = new TStackList(sz);
+        hasNextParse = false;
+        for (int i = sz - 1; i >= 0; i--) {
+            TStackList prevSL = (TStackList)iterators[i].previous();
+            x.addAll(0, prevSL);
+            iterators[i].next();
+            if (!hasNextParse && iterators[i].hasNext()) {
+                hasNextParse = true;
+                iterators[i].next();
+                // Reset all iterators to the right of i.
+                for (int j = i + 1; j < sz; j++) {
+                    while (iterators[j].hasPrevious())
+                        iterators[j].previous();
+                    iterators[j].next();
+                }
+            }
+        }
+        return x;
+    }
+}
--- a/source/org/thdl/tib/text/ttt/TPair.java
+++ b/source/org/thdl/tib/text/ttt/TPair.java
@ -0,0 +1,170 @@
+/*
+The contents of this file are subject to the THDL Open Community License
+Version 1.0 (the "License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License on the THDL web site 
+(http://www.thdl.org/).
+
+Software distributed under the License is distributed on an "AS IS" basis, 
+WITHOUT WARRANTY OF ANY KIND, either express or implied. See the 
+License for the specific terms governing rights and limitations under the 
+License. 
+
+The Initial Developer of this software is the Tibetan and Himalayan Digital
+Library (THDL). Portions created by the THDL are Copyright 2003 THDL.
+All Rights Reserved. 
+
+Contributor(s): ______________________________________.
+*/
+
+package org.thdl.tib.text.ttt;
+
+import org.thdl.util.ThdlDebug;
+
+/** An ordered pair used in ACIP-to-TMW conversion.  The left side is
+ *  the consonant or empty; the right side is the vowel, '+', or '-'.
+ *  @author David Chandler */
+/* DLC BIG FIXME: make this package work for EWTS, not just ACIP. */
+class TPair {
+    /** The left side, or null if there is no left side.  That is, the
+     *  non-vowel, non-'m', non-':', non-'-', non-'+' guy. */
+    private String l;
+    String getLeft() {
+        ThdlDebug.verify(!"".equals(l));
+        return l;
+    }
+
+    /** The right side. That is, the vowel, with 'm' or ':' "vowel"
+     *  after it if appropriate, or "-" (disambiguator), or "+"
+     *  (stacking), or null otherwise. */
+    private String r;
+    String getRight() {
+        ThdlDebug.verify(!"".equals(r));
+        return r;
+    }
+
+    /** Constructs a new TPair with left side l and right side r.
+     *  Use null or the empty string to represent an absence. */
+    TPair(String l, String r) {
+        // Normalize:
+        if (null != l && l.equals("")) l = null;
+        if (null != r && r.equals("")) r = null;
+
+        this.l = l;
+        this.r = r;
+    }
+
+    /** Returns a nice String representation.  Returns "(D . E)" for
+     *  ACIP {DE}, e.g., and (l . r) in general. */
+    public String toString() {
+        return "("
+            + ((null == l) ? "" : l) + " . "
+            + ((null == r) ? "" : r) + ")";
+    }
+
+    /** Returns the number of ACIP characters that make up this
+     *  TPair. */
+    int size() {
+        return (((l == null) ? 0 : l.length())
+                + ((r == null) ? 0 : r.length()));
+    }
+
+    /** Returns an TPair that is like this one except that it is
+     *  missing N characters.  The characters are taken from r, the
+     *  right side, first and from l, the left side, second.
+     *  @throw IllegalArgumentException if N is out of range */
+    TPair minusNRightmostACIPCharacters(int N)
+        throws IllegalArgumentException
+    {
+        int sz;
+        String newL = l, newR = r;
+        if (N > size())
+            throw new IllegalArgumentException("Don't have that many to remove.");
+        if (N < 1)
+            throw new IllegalArgumentException("You should't call this if you don't want to remove any.");
+        if (null != r && (sz = r.length()) > 0) {
+            int min = Math.min(sz, N);
+            newR = r.substring(0, sz - min);
+            N -= min;
+        }
+        if (N > 0) {
+            sz = l.length();
+            newL = l.substring(0, sz - N);
+        }
+        return new TPair(newL, newR);
+    }
+
+    /** Returns true if and only if this is nonempty and is l, if
+     *  present, is a legal ACIP consonant, and is r, if present, is a
+     *  legal ACIP vowel. */
+    boolean isLegal() {
+        if (size() < 1)
+            return false;
+        if (null != l && !ACIPRules.isConsonant(l))
+            return false;
+        if (null != r && !ACIPRules.isVowel(l))
+            return false;
+        return true;
+    }
+
+    /** Returns true if and only if this pair could be a Tibetan
+     *  prefix. */
+    boolean isPrefix() {
+        return (null != l
+                && ((null == r || "".equals(r))
+                    || "-".equals(r)
+                    || "A".equals(r)) // DLC though check for BASKYABS and warn because BSKYABS is more common
+                && ("'".equals(l)
+                    || "M".equals(l)
+                    || "B".equals(l)
+                    || "D".equals(l)
+                    || "G".equals(l)));
+    }
+
+    /** Returns true if and only if this pair is merely a
+     *  disambiguator. */
+    boolean isDisambiguator() {
+        return ("-".equals(r) && getLeft() == null);
+    }
+
+    /** Returns an TPair that is like this pair except that it has
+     *  a "+" on the right if this pair is empty on the right and is
+     *  empty on the right if this pair has a disambiguator (i.e., a
+     *  '-') on the right.  May return itself (but never mutates this
+     *  instance). */
+    TPair insideStack() {
+        if (null == getRight())
+            return new TPair(getLeft(), "+");
+        else if ("-".equals(getRight()))
+            return new TPair(getLeft(), null);
+        else
+            return this;
+    }
+
+    /** Returns true if this pair contains a Tibetan number. */
+    boolean isNumeric() {
+        char ch;
+        return (l != null && l.length() == 1 && (ch = l.charAt(0)) >= '0' && ch <= '9');
+    }
+
+    /** Returns the EWTS Wylie that corresponds to this pair.  Untested. */
+    String getWylie() {
+        String leftWylie = null;
+        if (getLeft() != null) {
+            leftWylie = ACIPRules.getWylieForACIPConsonant(getLeft());
+            if (leftWylie == null) {
+                if (isNumeric())
+                    leftWylie = getLeft();
+            }
+        }
+        String rightWylie = null;
+        if ("-".equals(getRight()))
+            rightWylie = ".";
+        else if ("+".equals(getRight()))
+            rightWylie = "+";
+        else if (getRight() != null)
+            rightWylie = ACIPRules.getWylieForACIPVowel(getRight());
+        if (null == leftWylie) leftWylie = "";
+        if (null == rightWylie) rightWylie = "";
+        return leftWylie + rightWylie;
+    }
+}
--- a/source/org/thdl/tib/text/ttt/TPairList.java
+++ b/source/org/thdl/tib/text/ttt/TPairList.java
@ -0,0 +1,579 @@
+/*
+The contents of this file are subject to the THDL Open Community License
+Version 1.0 (the "License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License on the THDL web site 
+(http://www.thdl.org/).
+
+Software distributed under the License is distributed on an "AS IS" basis, 
+WITHOUT WARRANTY OF ANY KIND, either express or implied. See the 
+License for the specific terms governing rights and limitations under the 
+License. 
+
+The Initial Developer of this software is the Tibetan and Himalayan Digital
+Library (THDL). Portions created by the THDL are Copyright 2003 THDL.
+All Rights Reserved. 
+
+Contributor(s): ______________________________________.
+*/
+
+package org.thdl.tib.text.ttt;
+
+import org.thdl.tib.text.TibetanMachineWeb;
+import org.thdl.tib.text.TGCPair;
+import org.thdl.util.ThdlDebug;
+
+import java.util.ArrayList;
+
+/** A list of {@link TPair TPairs}, typically corresponding to
+ *  one tsheg bar.  <i>l</i>' in the design doc is an TPairList.
+ *
+ *  @author David Chandler */
+class TPairList {
+    /** FIXME: change me and see if performance improves. */
+    private static final int INITIAL_SIZE = 1;
+
+    /** a fast, non-thread-safe, random-access list implementation: */
+    private ArrayList al;
+
+    /** Creates a new list containing just p. */
+    public TPairList(TPair p) {
+        al = new ArrayList(1);
+        add(p);
+    }
+
+    /** Creates an empty list. */
+    public TPairList() {
+        al = new ArrayList(INITIAL_SIZE);
+    }
+
+    /** Creates an empty list with the capacity to hold N items. */
+    public TPairList(int N) {
+        al = new ArrayList(N);
+    }
+
+    /** Returns the ith pair in this list. */
+    public TPair get(int i) { return (TPair)al.get(i); }
+
+    /** Adds p to the end of this list. */
+    public void add(TPair p) {
+        if (p == null || (p.getLeft() == null && p.getRight() == null))
+            throw new IllegalArgumentException("p is weird");
+        al.add(p);
+    }
+
+    /** Prepends p to the current list of TPairs. */
+    public void prepend(TPair p) {
+        al.add(0, p);
+    }
+
+    /** Returns the number of TPairs in this list. */
+    public int size() { return al.size(); }
+
+    /** Returns a human-readable representation.
+     *  @return something like [(R . ), (D . O)] */
+    public String toString2() {
+        return al.toString();
+    }
+
+    /** Returns a human-readable representation like {G}{YA} or
+     *  {G-}{YA}. */
+    public String toString() {
+        int sz = size();
+        StringBuffer b = new StringBuffer();
+        for (int i = 0; i < sz; i++) {
+            b.append('{');
+            if (null != get(i).getLeft())
+                b.append(get(i).getLeft());
+            if (null != get(i).getRight())
+                b.append(get(i).getRight());
+            b.append('}');
+        }
+        return b.toString();
+    }
+
+    /** Returns the ACIP corresponding to this TPairList.  It will
+     *  be as ambiguous as the input.  It may have more disambiguators
+     *  than the original, such as in the case of the ACIP {1234}. */
+    String recoverACIP() {
+        StringBuffer original = new StringBuffer();
+        int sz = size();
+        for (int i = 0; i < sz; i++) {
+            TPair p = get(i);
+            if (p.getLeft() != null)
+                original.append(p.getLeft());
+            if (p.getRight() != null)
+                original.append(p.getRight());
+        }
+        return original.toString();
+    }
+
+    /** Returns true if this list contains ( . <vowel>) or (A . ),
+     *  which are two simple errors you encounter if you interpret DAA
+     *  or TAA or DAI or DAE the wrong way. */
+    boolean hasSimpleError() {
+        int sz = size();
+        for (int i = 0; i < sz; i++) {
+            TPair p = get(i);
+            if ((null == p.getLeft() && !"-".equals(p.getRight()))
+                || ("A".equals(p.getLeft()) && null == p.getRight()))
+                return true;
+        }
+        return false;
+    }
+
+// DLC [THE FOLLOWIN... appears, so [#comment] or [comment] is possible. [BLANK PAGE] [MISSING PAGE] [FIRST] [SECOND] [DD1] [DD2] [46A.2] [THE ... [FOLLOWING... [PAGE ... [THESE ... @[7B] [SW: OK] [A FIRST... [ADDENDUM... [END ... [Additional [Some [Note [MISSING [DDD] [INCOMPLETE [LINE [DATA 
+// [A pair of ... which is part of the text! S0200A.ACE
+// [D] is a correction, eh?
+
+
+// DLC BDE 'BA' ZHIG RGYUN DU BSTEN, ,YENGS KYANG THUB NA [GNYEN PO,)
+//     'BYONGS [BLO,) S0375M.ACT
+
+
+// S0011N.ACT contains [SMON TSIG 'DI'I RTZOM MING MI GSAL,], why the brackets?  IS all this really a correction? DLC?
+// DLC: what are () for?
+
+    /** Finds errors so simple that they can be detected without using
+     *  the rules of Tibetan spelling (i.e., tsheg bar syntax).
+     *  Returns an error message, or null if there is no error that
+     *  you can find without the help of tsheg bar syntax rules. */
+    // DLC RENAME
+        // DLC FIXME: 9BLTA is an error, numbers are all or nothing
+    String getACIPError() {
+        int sz = size();
+        if (0 == sz)
+            return "Warning, empty tsheg bar found while converting from ACIP!";
+        boolean first = true;
+        StringBuffer rv = null;
+        boolean mustBeEntirelyNumeric = get(0).isNumeric();
+        for (int i = 0; i < sz; i++) {
+            TPair p = get(i);
+            if (mustBeEntirelyNumeric != p.isNumeric())
+                return "Cannot convert ACIP " + recoverACIP() + " because it contains a number but also a non-number.";
+
+            if ((i == 0 && "V".equals(p.getLeft()))
+                || (i > 0 && "V".equals(p.getLeft())
+                    && (null != get(i - 1).getRight()
+                        && !"+".equals(get(i - 1).getRight())))) {
+                if (first) {
+                    first = false;
+                    rv = new StringBuffer("Cannot convert ACIP ");
+                    rv.append(recoverACIP());
+                    rv.append(" because {V}, wa-zur, appears without being subscribed to a consonant.");
+                } else {
+                    rv.append("; also, {V}, wa-zur, appears without being subscribed to a consonant");
+                }
+            } else if ("A".equals(p.getLeft()) && (null == p.getRight() || "".equals(p.getRight()))) {
+                if (first) {
+                    first = false;
+                    rv = new StringBuffer("Cannot convert ACIP ");
+                    rv.append(recoverACIP());
+                    rv.append(" because we would be required to assume that {A} is a consonant, when it is not clear if it is a consonant or a vowel.");
+                } else {
+                    rv.append("; also, we would be required to assume that {A} is a consonant, when it is not clear if it is a consonant or a vowel.");
+                }
+            } else if ((null == p.getLeft() && !"-".equals(p.getRight()))
+                       || (null != p.getLeft()
+                           && !ACIPRules.isConsonant(p.getLeft())
+                           && !p.isNumeric())) {
+                if (first) {
+                    first = false;
+                    rv = new StringBuffer("Cannot convert ACIP ");
+                    rv.append(recoverACIP());
+                    rv.append(" because ");
+                    if (null == p.getLeft()) {
+                        rv.append(p.getRight());
+                        rv.append(" is a \"vowel\" without an associated consonant");
+                    } else {
+                        rv.append(p.getLeft());
+                        rv.append(" is not an ACIP consonant");
+                    }
+                } else {
+                    if (null == p.getLeft()) {
+                        rv.append("; also, ");
+                        rv.append(p.getRight());
+                        rv.append(" is an ACIP \"vowel\" without an associated consonant");
+                    } else {
+                        rv.append("; also, ");
+                        rv.append(p.getLeft());
+                        rv.append(" is not an ACIP consonant");
+                    }
+                }
+            }
+        }
+        if ("+".equals(get(sz - 1).getRight())) {
+            if (first) {
+                first = false;
+                rv = new StringBuffer("Cannot convert ACIP ");
+                rv.append(recoverACIP());
+                rv.append(" because it ends with a {+}.");
+            } else {
+                rv.append("; also, it ends with a {+}.");
+            }
+        }
+
+        // DLC really this is a warning, not an error:
+        if ("-".equals(get(sz - 1).getRight())) {
+            if (first) {
+                first = false;
+                rv = new StringBuffer("Cannot convert ACIP ");
+                rv.append(recoverACIP());
+                rv.append(" because it ends with a {-}.");
+            } else {
+                rv.append("; also, it ends with a {-}.");
+            }
+        }
+
+        return (rv == null) ? null : rv.toString();
+    }
+
+    /** Returns true if and only if either x is an TPairList object
+     *  representing the same TPairs in the same order or x is a
+     *  String that is equals to the result of {@link #toString()}. */
+    public boolean equals(Object x) {
+        if (x instanceof TPairList) {
+            return al.equals(((TPairList)x).al);
+        } else if (x instanceof String) {
+            return toString().equals(x) || toString2().equals(x);
+        }
+        return false;
+    }
+
+    /** Returns true if and only if this list is empty. */
+    public boolean isEmpty() { return al.isEmpty(); }
+
+    /** Returns a hashCode appropriate for use with our {@link
+     *  #equals(Object)} method. */
+    public int hashCode() { return al.hashCode(); }
+
+    private static final int STOP_STACK = 0;
+    private static final int KEEP_STACKING = 1;
+    private static final int ALWAYS_KEEP_STACKING = 2;
+    private static final int ALWAYS_STOP_STACKING = 3;
+
+    // DLC TEST: BA'I has exactly two syntactically legal parses but just one TStackList.
+
+    /** Returns a set (as as ArrayList) of all possible
+     *  TStackLists.  Uses knowledge of Tibetan spelling rules
+     *  (i.e., tsheg bar syntax) to do so.  If this list of pairs has
+     *  something clearly illegal in it, or is empty, or is merely a
+     *  list of disambiguators etc., then this returns null. */
+    public TParseTree getParseTree() {
+        TParseTree pt = new TParseTree();
+        int sz = size();
+        int firstPair = 0;
+        for (int i = 0; i < sz; i++) {
+
+            // We treat [(B . ), (G . +), (K . ), (T . A)] as if it
+            // could be {B+G+K+T} or {B}{G+K}{T} or {B+G+K}{T} or
+            // {B}{G+K+T} (modulo stack legality); we're conservative.
+            // (Though some stacks won't be legal.)
+
+
+            if (ddebug) System.out.println("i is " + i);
+            TPair p = get(i);
+            if (p.getRight() == null && firstPair + 1 < sz) {
+                // Here's the ambiguity.  Let's fill up sl. (B . ) (G
+                // . +) (K . A) could be {B+G+KA} or {BA}{G+KA}, so we
+                // go until we hit a vowel and then break into
+                // TPairLists.
+                int start = firstPair;
+                int blanks[] = new int[sz - start]; // we may not use all of this.
+                int j;
+                for (j = start; j < sz; j++) {
+                    TPair pj = get(j);
+                    boolean isBlank;
+                    if (ddebug) System.out.println("right guy is " + pj.getRight());
+                    if (pj.isDisambiguator())
+                        blanks[j-start] = ALWAYS_STOP_STACKING;
+                    else {
+                        if (!(isBlank = (pj.getRight() == null)) && !"+".equals(pj.getRight())) {
+                            if (ddebug) System.out.println("breaker breaker at j=" + j);
+                            break;
+                        }
+                        blanks[j-start] = isBlank ? STOP_STACK : ALWAYS_KEEP_STACKING;
+                    }
+                }
+                if (j >= sz) j = sz - 1;
+
+                blanks[j-start] = ALWAYS_STOP_STACKING;
+
+                // get(j) [corresponding to blanks[j-i]] is
+                // the last pair in the ambiguous stretch; get(i)
+                // [corresponding to blanks[0]] is the first.
+
+                // We'll end up doing 2**(j-i+1) (i.e., (1 <<
+                // (j-i+1))) iterations.  If that's going to be too
+                // many, let's just say there's no legal parse. FIXME:
+                // give a nice error message in this case.
+                if (ddebug) System.out.println("ddebug: we're going to do 2^" + (j-i+1) + " [or " + (1 << (j-i+1)) + "] wacky iterations!");
+                if ((j-i+1) > 13) // if you don't use 13, then change PackageTest.testSlowestTshegBar().
+                    return new TParseTree();
+
+                boolean keepGoing = true;
+                TStackListList sll = new TStackListList();
+                do {
+                    // Add the stack list currently specified by
+                    // blanks if all the stacks in it are legal.
+// DLC DELETE                    {
+//                         ArrayList x = new ArrayList((j-start+1));
+//                         for (int ii = 0; ii < (j-start+1); ii++)
+//                             x.add(new Integer(blanks[ii]));
+//                     }
+                    TStackList sl = new TStackList(sz - start);
+                    boolean illegal = false;
+                    TPairList currentStack = new TPairList();
+                    for (int k = 0; k < j-start+1; k++) {
+                        TPair pk = get(start + k);
+                        if (!pk.isDisambiguator()) {
+                            currentStack.add(pk.insideStack());
+                            if (blanks[k] == STOP_STACK) {
+                                if (currentStack.isLegalTibetanOrSanskritStack())
+                                    sl.add(currentStack.asStack());
+                                else {
+                                    illegal = true;
+                                    break;
+                                }
+                                currentStack = new TPairList();
+                            }
+                        }
+                    }
+                    if (!illegal && !currentStack.isEmpty()) {
+                        if (currentStack.isLegalTibetanOrSanskritStack()) {
+                            TPairList stack = currentStack.asStack();
+                            if (ddebug) System.out.println("adding currentStack " + stack + " to sl " + sl);
+                            sl.add(stack);
+                        } else {
+                            illegal = true;
+                        }
+                    }
+                    if (!illegal) {
+                        if (ddebug) System.out.println("adding sl " + sl + " to sll " + sll);
+                        sll.add(sl);
+                    }
+
+                    // Update blanks.  Think of this as doing base 2
+                    // arithmetic where STOP_STACK is zero,
+                    // KEEP_STACKING is one, and ALWAYS_KEEP_STACKING
+                    // and ALWAYS_STOP_STACKING are digits we cannot
+                    // modify.  We'll end up doing 2^M iterations,
+                    // where M is the number of fields in blanks that
+                    // are not equal to ALWAYS_KEEP_STACKING or
+                    // ALWAYS_STOP_STACKING.
+                    keepGoing = false;
+                    for (int k = j-start; k >= 0; k--) {
+                        if (blanks[k] == STOP_STACK) {
+                            keepGoing = true;
+                            blanks[k] = KEEP_STACKING;
+                            // reset all digits to the right of k to
+                            // "zero":
+                            for (int m = k + 1; m < j-start+1; m++) {
+                                if (blanks[m] == KEEP_STACKING)
+                                    blanks[m] = STOP_STACK;
+                            }
+                            break;
+                        }
+                    }
+                } while (keepGoing);
+                if (sll.isEmpty())
+                    return null; // STXAL or shT+ZNAGN, e.g.
+                else {
+                    if (ddebug) System.out.println("adding sll " + sll + " to parse tree " + pt);
+                    pt.add(sll);
+                }
+                
+                if (ddebug) System.out.println("i is " + i + " and j is " + j + " and we are resetting so that i==j+1 next time.");
+                i = j;
+                firstPair = j + 1;
+            } else if ("+".equals(p.getRight())) {
+                // Keep firstPair where it is.
+            } else {
+                // Add all pairs in the range [firstPair, i].  Some
+                // pairs are stacks all by themselves, some pairs have
+                // '+' on the right and are thus just part of a stack.
+                // We'll add a whole number of stacks, though.
+                
+                // this is initialized to hold the max we might use:
+                TStackListList sll
+                    = new TStackListList(i - firstPair + 1);
+
+                TPairList currentStack = new TPairList();
+                for (int j = firstPair; j <= i; j++) {
+                    TPair pj = get(j);
+                    if (!pj.isDisambiguator()) {
+                        currentStack.add(pj.insideStack());
+                        if (!"+".equals(pj.getRight())) {
+                            if (currentStack.isLegalTibetanOrSanskritStack())
+                                sll.add(new TStackList(currentStack.asStack()));
+                            else {
+                                return null;
+                            }
+                            currentStack = new TPairList();
+                        }
+                    }
+                }
+                if (!currentStack.isEmpty())
+                    throw new Error("how can this happen? currentStack is " + currentStack);
+
+                if (!sll.isEmpty()) {
+                    if (ddebug) System.out.println("adding sll " + sll + " to parse tree " + pt);
+                    pt.add(sll);
+                    firstPair = i + 1;
+                } // else you probably have {G--YA} or something as
+                  // your tsheg bar.
+            }
+        }
+        return pt;
+    }
+
+    /** Returns true if and only if this list of TPairs can be
+     *  interpreted as a legal Tibetan stack or a legal Tibetanized
+     *  Sanskrit stack.  This is private because a precondition is
+     *  that no vowels or disambiguators appear except possibly in the
+     *  final pair. */
+    private boolean isLegalTibetanOrSanskritStack() {
+        StringBuffer tibetan = new StringBuffer();
+        StringBuffer sanskrit = new StringBuffer();
+        int sz = size();
+
+        // Special case because otherwise wa-zur alone would be seen
+        // as legal.
+        if (sz == 1 && "V".equals(get(0).getLeft()))
+            return false;
+
+        for (int i = 0; i < sz; i++) {
+            TPair p = get(i);
+            String ewts_form
+                = ACIPRules.getWylieForACIPConsonant(p.getLeft());
+            if (null == ewts_form) {
+                if (p.isNumeric())
+                    ewts_form = p.getLeft();
+            }
+            if (null == ewts_form) {
+                if (ddebug) System.out.println("testing " + toString2() + " for legality said false. numeric?" + p.isNumeric() + "[1]");
+                return false;
+            }
+            tibetan.append(ewts_form);
+            sanskrit.append(ewts_form);
+            if (i + 1 < sz) {
+                tibetan.append('-');
+                sanskrit.append('+');
+            }
+        }
+        boolean ans = 
+            (TibetanMachineWeb.hasGlyph(tibetan.toString())
+                || TibetanMachineWeb.hasGlyph(sanskrit.toString()));
+        if (ddebug) System.out.println("testing " + toString2() + " for legality said " + ans + " [2]; san is " + sanskrit + " tib is " + tibetan + ".");
+        return ans;
+    }
+    private static final boolean ddebug = false;
+
+    /** Mutates this TPairList object such that the last pair is
+     *  empty or is a vowel, but is never the stacking operator ('+')
+     *  or a disambiguator (i.e., a '-' on the right).
+     *  @return this instance */
+    private TPairList asStack() {
+        if (!isEmpty()) {
+            TPair lastPair = get(size() - 1);
+            if ("+".equals(lastPair.getRight()))
+                al.set(size() - 1, new TPair(lastPair.getLeft(), null));
+            else if ("-".equals(lastPair.getRight()))
+                al.set(size() - 1, new TPair(lastPair.getLeft(), null));
+        }
+        return this;
+    }
+
+    /** Adds the TGCPairs corresponding to this list to the end of
+     *  pl. Some TPairs correspond to more than one TGCPair
+     *  ({AA:}); some TGCPairs correspond to more than one TPair
+     *  ({G+YA}).  To keep track, indexList will be appended to in
+     *  lockstep with pl.  index (wrapped as an {@link
+     *  java.lang#Integer}) will be appended to indexList once each
+     *  time we append to pl.  This assumes that this TPairList
+     *  corresponds to exactly one Tibetan grapheme cluster (i.e.,
+     *  stack).  Note that U+0F7F (ACIP {:}) is part of a stack, not a
+     *  stack all on its own. */
+    void populateWithTGCPairs(ArrayList pl, ArrayList indexList, int index) {
+        int sz = size();
+        if (sz == 0) {
+            return;
+        } else {
+            // drop the disambiguator, if there is one.
+
+            boolean isNumeric = false;
+            StringBuffer lWylie = new StringBuffer();
+            int i;
+            // All pairs but the last:
+            for (i = 0; i + 1 < sz; i++) {
+                lWylie.append(get(i).getWylie());
+                if (get(i).isNumeric())
+                    isNumeric = true;
+            }
+
+            // The last pair:
+            TPair p = get(i);
+            ThdlDebug.verify(!"+".equals(p.getRight()));
+            int where;
+            boolean add_U0F7F = false;
+            if (p.getRight() != null
+                && (where = p.getRight().indexOf(':')) >= 0) {
+                // this ':' guy is his own TGCPair.
+                add_U0F7F = true;
+                StringBuffer rr = new StringBuffer(p.getRight());
+                rr.deleteCharAt(where);
+                p = new TPair(p.getLeft(), rr.toString());
+            }
+            boolean hasNonAVowel = (!"A".equals(p.getRight()) && null != p.getRight());
+            String thislWylie = ACIPRules.getWylieForACIPConsonant(p.getLeft());
+            if (thislWylie == null) {
+                char ch;
+                if (p.isNumeric()) {
+                    thislWylie = p.getLeft();
+                    isNumeric = true;
+                }
+            }
+
+            if (null == thislWylie) throw new Error("BADNESS AT MAXIMUM: p is " + p + " and thislWylie is " + thislWylie);
+            lWylie.append(thislWylie);
+            StringBuffer ll = new StringBuffer(lWylie.toString());
+            int ww;
+            // DLC NOW: what about fixed-form RA on top???  test it.
+            while ((ww = ll.indexOf("+")) >= 0)
+                ll.deleteCharAt(ww);
+            boolean isTibetan = TibetanMachineWeb.isWylieTibetanConsonantOrConsonantStack(ll.toString());
+            boolean isSanskrit = TibetanMachineWeb.isWylieSanskritConsonantStack(lWylie.toString());
+            if (!isTibetan && !isSanskrit && !isNumeric && true) {
+                System.out.println("DLC: OTHER for " + lWylie + " with vowel " + ACIPRules.getWylieForACIPVowel(p.getRight()) + " and p.getRight()=" + p.getRight());
+            }
+            if (isTibetan && isSanskrit) isSanskrit = false; // RVA, e.g.
+            if (true && hasNonAVowel && ACIPRules.getWylieForACIPVowel(p.getRight()) == null) {
+                System.out.println("DLC: vowel " + ACIPRules.getWylieForACIPVowel(p.getRight()) + " and p.getRight()=" + p.getRight());
+            }
+            TGCPair tp;
+            indexList.add(new Integer(index));
+            tp = new TGCPair(lWylie.toString()
+                             + (hasNonAVowel
+                                ? ACIPRules.getWylieForACIPVowel(p.getRight())
+                                : ""),
+                             (isNumeric
+                              ? TGCPair.OTHER
+                              : (hasNonAVowel
+                                 ? (isSanskrit
+                                    ? TGCPair.SANSKRIT_WITH_VOWEL
+                                    : (isTibetan
+                                       ? TGCPair.CONSONANTAL_WITH_VOWEL
+                                       : TGCPair.OTHER))
+                                 : (isSanskrit
+                                    ? TGCPair.SANSKRIT_WITHOUT_VOWEL
+                                    : (isTibetan
+                                       ? TGCPair.CONSONANTAL_WITHOUT_VOWEL
+                                       : TGCPair.OTHER)))));
+            pl.add(tp);
+            if (add_U0F7F) {
+                indexList.add(new Integer(index));
+                pl.add(new TGCPair("H", TGCPair.OTHER));
+            }
+        }
+    }
+}
+// DLC FIXME: handle 'o' and 'x', e.g. KAo and NYAx.
--- a/source/org/thdl/tib/text/ttt/TPairListFactory.java
+++ b/source/org/thdl/tib/text/ttt/TPairListFactory.java
@ -0,0 +1,167 @@
+/*
+The contents of this file are subject to the THDL Open Community License
+Version 1.0 (the "License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License on the THDL web site 
+(http://www.thdl.org/).
+
+Software distributed under the License is distributed on an "AS IS" basis, 
+WITHOUT WARRANTY OF ANY KIND, either express or implied. See the 
+License for the specific terms governing rights and limitations under the 
+License. 
+
+The Initial Developer of this software is the Tibetan and Himalayan Digital
+Library (THDL). Portions created by the THDL are Copyright 2003 THDL.
+All Rights Reserved. 
+
+Contributor(s): ______________________________________.
+*/
+
+package org.thdl.tib.text.ttt;
+
+/** A factory for creating {@link TPairList TPairLists} from
+ *  Strings of ACIP.
+ *  @author David Chandler */
+class TPairListFactory {
+    /** This class is not instantiable. */
+    private TPairListFactory() { }
+
+    /** Returns a new TPairList instance.  Breaks an ACIP tsheg bar
+     *  (roughly a &quot;syllable&quot;) into chunks; this computes l'
+     *  (for you design doc enthusiasts).
+     *
+     *  <p>Here's a rough sketch of the algorithm: run along getting
+     *  the current TPair as big as you can.  If you get it very
+     *  big, but there's something illegal afterward that wouldn't
+     *  otherwise be illegal, undo as little as possible to correct.
+     *  For example, G'A'I becomes [(G . 'A), (' . I)], and TAA
+     *  becomes [(T . A)] in a first pass but then we see that the
+     *  rest would be suboptimal, so we backtrack to [(T . )] and then
+     *  finally become [(T . ), (A . A)].  We look for (A . ) and (
+     *  . <vowel>) in the rest in order to say "the rest would be
+     *  suboptimal", i.e. we use TPairList.hasSimpleError()
+     *  @param acip a string of ACIP with no punctuation in it */
+    static TPairList breakACIPIntoChunks(String acip) {
+
+        // base case for our recursion:
+        if ("".equals(acip))
+            return new TPairList();
+
+        StringBuffer acipBuf = new StringBuffer(acip);
+        int howMuchBuf[] = new int[1];
+        TPair head = getFirstConsonantAndVowel(acipBuf, howMuchBuf);
+        int howMuch = howMuchBuf[0];
+        TPairList tail;
+        if ((tail
+             = breakACIPIntoChunks(acipBuf.substring(howMuch))).hasSimpleError()) {
+            for (int i = 1; i < howMuch; i++) {
+                // try giving i characters back if that leaves us with
+                // a legal head and makes the rest free of simple
+                // errors.
+                TPairList newTail = null;
+                TPair newHead;
+                if ((newHead = head.minusNRightmostACIPCharacters(i)).isLegal()
+                    && !(newTail
+                         = breakACIPIntoChunks(acipBuf.substring(howMuch - i))).hasSimpleError()) {
+                    newTail.prepend(newHead);
+                    return newTail;
+                }
+            }
+            // It didn't work.  Return the first thing we'd thought
+            // of: head appended with tail.  (I.e., fall through.)
+        }
+        tail.prepend(head);
+        return tail;
+    }
+
+    /** Returns the largest TPair we can make from the acip
+     *  starting from the left. This will return a size zero pair if
+     *  and only if acip is the empty string; otherwise, it may return
+     *  a pair with either the left or right component empty.  This
+     *  mutates acip when we run into {NA+YA}; it mutates acip into
+     *  {N+YA}.  For {NE+YA}, it doesn not mutate acip or behave
+     *  intelligently.  A later phase will need to turn that into
+     *  {N+YE} (DLC).  howMuch[0] will be set to the number of
+     *  characters of acip that this call has consumed. */
+    private static TPair getFirstConsonantAndVowel(StringBuffer acip,
+                                                      int howMuch[]) {
+        // Note that it is *not* the case that if acip.substring(0, N)
+        // is legal (according to TPair.isLegal()), then
+        // acip.substring(0, N-1) is legal for all N.  For example,
+        // think of {shA} and {KshA}.  However, 's' is the only tricky
+        // fellow, so it is true that acip.substring(0, N-1) is either
+        // legal or ends with 's' if acip.substring(0, N) is legal.
+        //
+        // We don't, however, use this approach.  We just try to find
+        // a consonant of length 3, and then, failing that, of length
+        // 2, etc.  Likewise with vowels.  This avoids the issue.
+
+        int i, xl = acip.length();
+        if (0 == xl) {
+            howMuch[0] = 0;
+            return new TPair(null, null);
+        }
+        if (acip.charAt(0) == '-') {
+            howMuch[0] = 1;
+            return new TPair(null, "-");
+        }
+        char ch = acip.charAt(0);
+
+        // Numbers never appear in stacks, so if you see 1234, that's
+        // like seeing 1-2-3-4.
+        if (ch >= '0' && ch <= '9') {
+            howMuch[0] = 1; // not 2...
+            return new TPair(acip.substring(0, 1), (xl == 1) ? null : "-");
+        }
+
+        String l = null, r = null;
+        for (i = Math.min(ACIPRules.MAX_CONSONANT_LENGTH, xl); i >= 1; i--) {
+            String t = null;
+            if (ACIPRules.isConsonant(t = acip.substring(0, i))) {
+                l = t;
+                break;
+            }
+        }
+        int ll = (null == l) ? 0 : l.length();
+        if (null != l && xl > ll && acip.charAt(ll) == '-') {
+            howMuch[0] = l.length() + 1;
+            return new TPair(l, "-");
+        }
+        if (null != l && xl > ll && acip.charAt(ll) == '+') {
+            howMuch[0] = l.length() + 1;
+            return new TPair(l, "+");
+        }
+        for (i = Math.min(ACIPRules.MAX_VOWEL_LENGTH, xl - ll); i >= 1; i--) {
+            String t = null;
+            if (ACIPRules.isVowel(t = acip.substring(ll, ll + i))) {
+                r = t;
+                break;
+            }
+        }
+
+        // Treat {BATA+SA'I} like {BAT+SA'I}:
+        int z;
+        if (null != l && "A".equals(r) && ((z = ll + "A".length()) < xl)
+            && acip.charAt(z) == '+') {
+            acip.deleteCharAt(z-1);
+            howMuch[0] = l.length() + 1;
+            return new TPair(l, "+");
+        }
+
+        // what if we see a character that's not part of any vowel or
+        // consonant?  We return it.
+        if (null == l && null == r) {
+            howMuch[0] = 1; // not 2...
+            // add a '-' to avoid exponentials:
+            return new TPair(acip.substring(0, 1), (xl == 1) ? null : "-");
+        }
+
+        howMuch[0] = (((l == null) ? 0 : l.length())
+                      + ((r == null) ? 0 : r.length()));
+        return new TPair(l, r);
+    }
+}
+
+
+// DLC strip out [#...] comments; test for nested comments
+
+// DLC see Translit directory on ACIP v4 CD-ROM
--- a/source/org/thdl/tib/text/ttt/TParseTree.java
+++ b/source/org/thdl/tib/text/ttt/TParseTree.java
@ -0,0 +1,200 @@
+/*
+The contents of this file are subject to the THDL Open Community License
+Version 1.0 (the "License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License on the THDL web site 
+(http://www.thdl.org/).
+
+Software distributed under the License is distributed on an "AS IS" basis, 
+WITHOUT WARRANTY OF ANY KIND, either express or implied. See the 
+License for the specific terms governing rights and limitations under the 
+License. 
+
+The Initial Developer of this software is the Tibetan and Himalayan Digital
+Library (THDL). Portions created by the THDL are Copyright 2003 THDL.
+All Rights Reserved. 
+
+Contributor(s): ______________________________________.
+*/
+
+package org.thdl.tib.text.ttt;
+
+import java.util.ArrayList;
+
+/** A list of non-empty list of {@link #TStackListList
+ *  TStackListLists} representing all the ways you could break up a
+ *  tsheg bar of ACIP into stacks (i.e., grapheme clusters).
+ *
+ *  @author David Chandler */
+class TParseTree {
+    /** a fast, non-thread-safe, random-access list implementation: */
+    private ArrayList al = new ArrayList();
+
+    /** Creates an empty list. */
+    public TParseTree() { }
+
+    /** Returns the ith pair in this list. */
+    public TStackListList get(int i) { return (TStackListList)al.get(i); }
+
+    /** Adds p to the end of this list. */
+    public void add(TStackListList p)
+        throws IllegalArgumentException
+    {
+        if (p.isEmpty())
+            throw new IllegalArgumentException("p is empty");
+        al.add(p);
+    }
+
+    /** Returns the number of TStackListLists in this list.  See
+     * also {@link #numberOfParses()}, which gives a different
+     * interpretation of the size of this tree. */
+    public int size() { return al.size(); }
+
+    /** Returns the number of different parses one could make from
+     *  this parse tree.  Returns zero if this list is empty. */
+    public int numberOfParses() {
+        if (al.isEmpty()) return 0;
+        int k = 1;
+        int sz = size();
+        for (int i = 0; i < sz; i++) {
+            k *= get(i).size();
+        }
+        return k;
+    }
+        
+    /** Returns the number of {@link #TPair pairs} that are in a
+     *  parse of this tree. */
+    public int numberOfPairs() {
+        if (al.isEmpty()) return 0;
+        int k = 1;
+        int sz = size();
+        for (int i = 0; i < sz; i++) {
+            // get(i).get(0) is the same size as get(i).get(1),
+            // get(i).get(2), ...
+            k += get(i).get(0).size();
+        }
+        return k;
+    }
+        
+    /** Returns an iterator that will iterate over the {@link
+     *  #numberOfParses} available. */
+    public ParseIterator getParseIterator() {
+        return new ParseIterator(al);
+    }
+
+    /** Returns a list containing the legal parses of this parse tree.
+     *  By &quot;legal&quot;, we mean a sequence of stacks that is
+     *  legal by the rules of Tibetan tsheg bar syntax (sometimes
+     *  called spelling).  This will return the {G-YA} parse of {GYA}
+     *  as well as the {GYA} parse, so watch yourself. */
+    public TStackListList getLegalParses() {
+        TStackListList sll = new TStackListList(2); // save memory
+        ParseIterator pi = getParseIterator();
+        while (pi.hasNext()) {
+            TStackList sl = pi.next();
+            if (sl.isLegalTshegBar().isLegal) {
+                sll.add(sl);
+            }
+        }
+        return sll;
+    }
+
+    /** Returns a list containing the parses of this parse tree that
+     *  are not clearly illegal. */
+    public TStackListList getNonIllegalParses() {
+        TStackListList sll = new TStackListList(2); // save memory
+        ParseIterator pi = getParseIterator();
+        while (pi.hasNext()) {
+            TStackList sl = pi.next();
+            if (!sl.isClearlyIllegal()) {
+                sll.add(sl);
+            }
+        }
+        return sll;
+    }
+
+    /** Returns the best parse, if there is a unique parse that is
+     *  clearly preferred to other parses.  Basically, if there's a
+     *  unique legal parse, you get it.  If there's not, but there is
+     *  a unique non-illegal parse, you get it.  If there's not a
+     *  unique answer, null is returned. */
+    // {TZANDRA} is not solved by this, DLC NOW.  Solve PADMA PROBLEM!
+
+    // DLC by using this we can get rid of single-sanskrit-gc, eh?
+    public TStackList getBestParse() {
+        TStackListList up = getUniqueParse();
+        if (up.size() == 1)
+            return up.get(0);
+        else if (up.size() == 2) {
+        }
+        up = getNonIllegalParses();
+        int sz = up.size();
+        if (up.size() == 1) {
+            return up.get(0);
+        }
+        return null;
+    }
+
+    /** Returns a list containing the unique legal parse of this parse
+     *  tree if there is a unique legal parse.  Note that {SRAS} has a
+     *  unique legal parse, though {SRS} has two equally good parses;
+     *  i.e., note that the {A} vowel is treated specially here
+     *  (unlike in {@link #getLegalParses()}). Returns an empty list
+     *  if there are no legal parses.  Returns a list containing all
+     *  legal parses if there two or more equally good parses.  By
+     *  &quot;legal&quot;, we mean a sequence of stacks that is legal
+     *  by the rules of Tibetan tsheg bar syntax (sometimes called
+     *  spelling). */
+    public TStackListList getUniqueParse() {
+        TStackListList allLegalParses = new TStackListList(2); // save memory
+        TStackListList legalParsesWithVowelOnRoot = new TStackListList(1);
+        ParseIterator pi = getParseIterator();
+        while (pi.hasNext()) {
+            TStackList sl = pi.next();
+            BoolPair bpa = sl.isLegalTshegBar();
+            if (bpa.isLegal) {
+                if (bpa.isLegalAndHasAVowelOnRoot)
+                    legalParsesWithVowelOnRoot.add(sl);
+                allLegalParses.add(sl);
+            }
+        }
+        if (legalParsesWithVowelOnRoot.size() == 1)
+            return legalParsesWithVowelOnRoot;
+        else {
+            if (legalParsesWithVowelOnRoot.size() == 2) {
+                // DLC is this even valid?
+                if (legalParsesWithVowelOnRoot.get(0).size() != 1 + legalParsesWithVowelOnRoot.get(1).size())
+                    throw new Error("Something other than the G-YA vs. GYA case appeared.  Sorry for your trouble! " + legalParsesWithVowelOnRoot.get(0) + " ;; " + legalParsesWithVowelOnRoot.get(1));
+                return new TStackListList(legalParsesWithVowelOnRoot.get(1));
+            }
+            if (allLegalParses.size() == 2) {
+                // DLC is this even valid?
+                if (allLegalParses.get(0).size() != 1 + allLegalParses.get(1).size())
+                    throw new Error("Something other than the G-YA vs. GYA case appeared.  Sorry for your trouble! " + allLegalParses.get(0) + " ;; " + allLegalParses.get(1));
+                return new TStackListList(allLegalParses.get(1));
+            }
+            return allLegalParses;
+        }
+    }
+
+    /** Returns a human-readable representation. */
+    public String toString() {
+        return al.toString();
+    }
+
+    /** Returns true if and only if either x is an TParseTree
+     *  object representing the same TPairLists in the same order
+     *  or x is a String that is equals to the result of {@link
+     *  #toString()}. */
+    public boolean equals(Object x) {
+        if (x instanceof TParseTree) {
+            return al.equals(((TParseTree)x).al);
+        } else if (x instanceof String) {
+            return toString().equals(x);
+        }
+        return false;
+    }
+
+    /** Returns a hashCode appropriate for use with our {@link
+     *  #equals(Object)} method. */
+    public int hashCode() { return al.hashCode(); }
+}
--- a/source/org/thdl/tib/text/ttt/TStackList.java
+++ b/source/org/thdl/tib/text/ttt/TStackList.java
@ -0,0 +1,176 @@
+/*
+The contents of this file are subject to the THDL Open Community License
+Version 1.0 (the "License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License on the THDL web site 
+(http://www.thdl.org/).
+
+Software distributed under the License is distributed on an "AS IS" basis, 
+WITHOUT WARRANTY OF ANY KIND, either express or implied. See the 
+License for the specific terms governing rights and limitations under the 
+License. 
+
+The Initial Developer of this software is the Tibetan and Himalayan Digital
+Library (THDL). Portions created by the THDL are Copyright 2003 THDL.
+All Rights Reserved. 
+
+Contributor(s): ______________________________________.
+*/
+
+package org.thdl.tib.text.ttt;
+
+import org.thdl.tib.text.TibTextUtils;
+import org.thdl.tib.text.TGCList;
+
+import java.util.ArrayList;
+import java.util.ListIterator;
+
+/** A list of {@link TPairList TPairLists}, each of which is for
+ *  a stack (a grapheme cluster), typically corresponding to one tsheg
+ *  bar.
+ *
+ *  @author David Chandler */
+class TStackList {
+    /** FIXME: change me and see if performance improves. */
+    private static final int INITIAL_SIZE = 1;
+
+    /** a fast, non-thread-safe, random-access list implementation: */
+    private ArrayList al;
+
+    /** Creates an empty list. */
+    public TStackList() { al = new ArrayList(INITIAL_SIZE); }
+
+    /** Creates a list containing just p. */
+    public TStackList(TPairList p) {
+        al = new ArrayList(1);
+        add(p);
+    }
+
+    /** Creates an empty list with the capacity to hold N items. */
+    public TStackList(int N) {
+        al = new ArrayList(N);
+    }
+
+    /** Returns the ith pair in this list. */
+    public TPairList get(int i) { return (TPairList)al.get(i); }
+
+    /** Adds p to the end of this list. */
+    public void add(TPairList p) { al.add(p); }
+
+    /** Adds all the stacks in c to the end of this list. */
+    public void addAll(TStackList c) { al.addAll(c.al); }
+
+    /** Adds all the stacks in c to this list, inserting them at
+     *  position k. */
+    public void addAll(int k, TStackList c) { al.addAll(k, c.al); }
+
+    /** Returns the number of TPairLists in this list. */
+    public int size() { return al.size(); }
+
+    /** Returns true if and only if this list is empty. */
+    public boolean isEmpty() { return al.isEmpty(); }
+
+    /** Returns a human-readable representation like {G}{YA} or
+     *  {GYA}. */
+    public String toString() {
+        int sz = size();
+        StringBuffer b = new StringBuffer();
+        for (int i = 0; i < sz; i++) {
+            b.append('{');
+            b.append(get(i).recoverACIP());
+            b.append('}');
+        }
+        return b.toString();
+    }
+
+    /** Returns a human-readable representation.
+     *  @return something like [[(R . ), (D . O)], [(R . ), (J . E)]] */
+    public String toString2() {
+        return al.toString();
+    }
+
+    /** Returns true if and only if either x is an TStackList
+     *  object representing the same TPairLists in the same
+     *  order or x is a String that is equals to the result of {@link
+     *  #toString()}. */
+    public boolean equals(Object x) {
+        if (x instanceof TStackList) {
+            return al.equals(((TStackList)x).al);
+        } else if (x instanceof String) {
+            return toString().equals(x) || toString2().equals(x);
+        }
+        return false;
+    }
+
+    /** Returns a hashCode appropriate for use with our {@link
+     *  #equals(Object)} method. */
+    public int hashCode() { return al.hashCode(); }
+
+    /** Returns an iterator for this list. Mutate this list while
+     *  iterating and you'll have to read the code to know what will
+     *  happen. */
+    public ListIterator listIterator() { return al.listIterator(); }
+
+    /** Returns a pair with {@link BoolPair#isLegal} true if and only
+     *  if this list of stacks is a legal tsheg bar by the rules of
+     *  Tibetan syntax (sometimes called rules of spelling).  If this
+     *  is legal, then {@link BoolPair#isLegalAndHasAVowelOnRoot} will
+     *  be true if and only if there is an explicit {A} vowel on the
+     *  root stack. */
+    public BoolPair isLegalTshegBar() {
+        // DLC handle PADMA and other Tibetanized Sanskrit fellows.  Right now we only handle single-stack guys.
+
+        TTGCList tgcList = new TTGCList(this);
+        StringBuffer warnings = new StringBuffer();
+        String candidateType
+            = TibTextUtils.getClassificationOfTshegBar(tgcList, warnings);
+        // System.out.println("DLC: " + toString() + " has candidateType " + candidateType + " and warnings " + warnings);
+
+        // preliminary answer:
+        boolean isLegal = (candidateType != "invalid");
+
+        if (isLegal) {
+            if (isClearlyIllegal())
+                isLegal = false;
+        }
+
+        boolean isLegalAndHasAVowelOnRoot = false;
+        if (isLegal) {
+            int rootIndices[]
+                = TibTextUtils.getIndicesOfRootForCandidateType(candidateType);
+            for (int i = 0; i < 2; i++) {
+                if (rootIndices[i] >= 0) {
+                    int pairListIndex = tgcList.getTPairListIndex(rootIndices[i]);
+                    TPairList pl = get(pairListIndex);
+                    TPair p = pl.get(pl.size() - 1);
+                    isLegalAndHasAVowelOnRoot
+                        = (p.getRight() != null && p.getRight().startsWith("A")); // could be {A:}, e.g.
+                    if (isLegalAndHasAVowelOnRoot)
+                        break;
+                }
+            }
+        }
+        return new BoolPair(isLegal, isLegalAndHasAVowelOnRoot);
+    }
+
+    /** Returns true if and only if this stack list contains a clearly
+     *  illegal construct, such as an TPair (V . something). */
+    boolean isClearlyIllegal() {
+        // check for {D}{VA} sorts of things:
+        for (int i = 0; i < size(); i++) {
+            if (get(i).getACIPError() != null) {
+                System.out.println("DLC: error is " + get(i).getACIPError());
+                return true;
+            }
+        }
+        return false;
+    }
+}
+
+class BoolPair {
+    boolean isLegal;
+    boolean isLegalAndHasAVowelOnRoot;
+    BoolPair(boolean isLegal, boolean isLegalAndHasAVowelOnRoot) {
+        this.isLegal = isLegal;
+        this.isLegalAndHasAVowelOnRoot = isLegalAndHasAVowelOnRoot;
+    }
+}
--- a/source/org/thdl/tib/text/ttt/TStackListList.java
+++ b/source/org/thdl/tib/text/ttt/TStackListList.java
@ -0,0 +1,86 @@
+/*
+The contents of this file are subject to the THDL Open Community License
+Version 1.0 (the "License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License on the THDL web site 
+(http://www.thdl.org/).
+
+Software distributed under the License is distributed on an "AS IS" basis, 
+WITHOUT WARRANTY OF ANY KIND, either express or implied. See the 
+License for the specific terms governing rights and limitations under the 
+License. 
+
+The Initial Developer of this software is the Tibetan and Himalayan Digital
+Library (THDL). Portions created by the THDL are Copyright 2003 THDL.
+All Rights Reserved. 
+
+Contributor(s): ______________________________________.
+*/
+
+package org.thdl.tib.text.ttt;
+
+import java.util.ArrayList;
+import java.util.ListIterator;
+
+/** A list of {@link #TStackList} objects, each of which is for a
+ *  stack (a grapheme cluster), typically corresponding to one
+ *  ambiguous section of a tsheg bar.
+ *
+ *  @author David Chandler */
+class TStackListList {
+    /** a fast, non-thread-safe, random-access list implementation: */
+    private ArrayList al;
+
+    /** Creates an empty list. */
+    public TStackListList() { al = new ArrayList(); }
+
+    /** Creates a list containing just p. */
+    public TStackListList(TStackList p) {
+        al = new ArrayList(1);
+        add(p);
+    }
+
+    /** Creates an empty list with the capacity to hold N items. */
+    public TStackListList(int N) {
+        al = new ArrayList(N);
+    }
+
+    /** Returns the ith pair in this list. */
+    public TStackList get(int i) { return (TStackList)al.get(i); }
+
+    /** Adds p to the end of this list. */
+    public void add(TStackList p) { al.add(p); }
+
+    /** Returns the number of TStackList objects in this list. */
+    public int size() { return al.size(); }
+
+    /** Returns true if and only if this list is empty. */
+    public boolean isEmpty() { return al.isEmpty(); }
+
+    /** Returns a human-readable representation.
+     *  @return something like [[[(R . ), (D . O)], [(R . ), (J . E)]]] */
+    public String toString() {
+        return al.toString();
+    }
+
+    /** Returns true if and only if either x is an TStackListList
+     *  object representing the same TStackList objects in the same
+     *  order or x is a String that is equals to the result of {@link
+     *  #toString()}. */
+    public boolean equals(Object x) {
+        if (x instanceof TStackListList) {
+            return al.equals(((TStackListList)x).al);
+        } else if (x instanceof String) {
+            return toString().equals(x);
+        }
+        return false;
+    }
+
+    /** Returns a hashCode appropriate for use with our {@link
+     *  #equals(Object)} method. */
+    public int hashCode() { return al.hashCode(); }
+
+    /** Returns an iterator for this list. Mutate this list while
+     *  iterating and you'll have to read the code to know what will
+     *  happen. */
+    public ListIterator listIterator() { return al.listIterator(); }
+}
--- a/source/org/thdl/tib/text/ttt/TTGCList.java
+++ b/source/org/thdl/tib/text/ttt/TTGCList.java
@ -0,0 +1,63 @@
+/*
+The contents of this file are subject to the THDL Open Community License
+Version 1.0 (the "License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License on the THDL web site 
+(http://www.thdl.org/).
+
+Software distributed under the License is distributed on an "AS IS" basis, 
+WITHOUT WARRANTY OF ANY KIND, either express or implied. See the 
+License for the specific terms governing rights and limitations under the 
+License. 
+
+The Initial Developer of this software is the Tibetan and Himalayan Digital
+Library (THDL). Portions created by the THDL are Copyright 2003 THDL.
+All Rights Reserved. 
+
+Contributor(s): ______________________________________.
+*/
+
+package org.thdl.tib.text.ttt;
+
+import org.thdl.tib.text.TGCList;
+import org.thdl.tib.text.TGCPair;
+
+import java.util.ArrayList;
+
+/** A list of grapheme clusters.
+ *
+ *  @author David Chandler */
+class TTGCList implements TGCList {
+    // I could use one list of an ordered pair (TGCPair, int), but I
+    // use two lists.
+    private ArrayList al;
+    private ArrayList stackIndices;
+
+    /** Don't use this. */
+    private TTGCList() { }
+
+    /** Creates a TGCList. */
+    public TTGCList(TStackList sl) {
+        al = new ArrayList();
+        stackIndices = new ArrayList();
+        int sz = sl.size();
+        for (int i = 0; i < sz; i++) {
+            sl.get(i).populateWithTGCPairs(al, stackIndices, i);
+        }
+    }
+
+    /** Returns the ith pair in this list. */
+    public TGCPair get(int i) {
+        return (TGCPair)al.get(i);
+    }
+
+    /** Returns the number of TGCPairs in this list. */
+    public int size() { return al.size(); }
+
+    /** Returns a zero-based index of an TPairList inside the stack
+     *  list from which this list was constructed.  This pair list is
+     *  the one that caused the TGCPair at index tgcPairIndex to come
+     *  into existence. */
+    public int getTPairListIndex(int tgcPairIndex) {
+        return ((Integer)stackIndices.get(tgcPairIndex)).intValue();
+    }
+}
--- a/source/org/thdl/tib/text/ttt/package.html
+++ b/source/org/thdl/tib/text/ttt/package.html
@ -0,0 +1,31 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
+<html>
+<head>
+<!--
+
+  @(#)package.html
+
+  Copyright 2003 Tibetan and Himalayan Digital Library
+
+  This software is the confidential and proprietary information of
+  the Tibetan and Himalayan Digital Library. You shall use such
+  information only in accordance with the terms of the license
+  agreement you entered into with the THDL.
+
+-->
+</head>
+<body bgcolor="white">
+
+Provides classes and methods for converting Latin transliteration of
+Tibetan text into Tibetan.
+<p>
+This package (whose name, ttt, stands for transliteration-to-Tibetan)
+contains methods for converting ACIP transliteration into Tibetan
+Machine Web and methods for converting EWTS transliteration into
+Tibetan Machine Web.&nbsp; It has extensive tests, though probably not
+mentioned in these Javadoc documents.
+</p>
+<h2>Related Documentation</h2>
+@see <a href="../package-summary.html">org.thdl.tib.text</a>
+</body>
+</html>