Jskad/source/org/thdl/tib/text/ttt/TPairList.java

/*
The contents of this file are subject to the THDL Open Community License
Version 1.0 (the "License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License on the THDL web site
(http://www.thdl.org/).

Software distributed under the License is distributed on an "AS IS" basis,
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
License for the specific terms governing rights and limitations under the
License.

The Initial Developer of this software is the Tibetan and Himalayan Digital
Library (THDL). Portions created by the THDL are Copyright 2003 THDL.
All Rights Reserved.

Contributor(s): ______________________________________.
*/

// TODO(DLC)[EWTS->Tibetan]: a (DLC: does this become (a.) or (.a)?), ug pha, g.a, aM, etc. -- test!

package org.thdl.tib.text.ttt;

import java.util.ArrayList;
import java.util.HashMap;

import org.thdl.tib.text.TGCPair;
import org.thdl.tib.text.TibetanMachineWeb;
import org.thdl.util.ThdlDebug;

/** A list of {@link TPair TPairs}, typically corresponding to
 *  one tsheg bar.  <i>l</i>' in the design doc is a TPairList.
 *
 *  @author David Chandler */
class TPairList {
    /** the part that knows ACIP from EWTS */
    private TTraits traits;

    /** FIXME: change me and see if performance improves. */
    private static final int INITIAL_SIZE = 1;

    /** a fast, non-thread-safe, random-access list implementation: */
    private ArrayList al;

    /** Creates a new list containing just p. */
    public TPairList(TPair p) {
        this.traits = p.getTraits();
        al = new ArrayList(1);
        add(p);
    }

    /** Creates an empty list. */
    public TPairList(TTraits traits) {
        this.traits = traits;
        al = new ArrayList(INITIAL_SIZE);
    }

    /** Creates an empty list with the capacity to hold N items. */
    public TPairList(TTraits traits, int N) {
        this.traits = traits;
        al = new ArrayList(N);
    }

    /** Returns the ith pair in this list. */
    public TPair get(int i) { return (TPair)al.get(i); }

    /** Returns the ith non-disambiguator pair in this list. This is
     *  O(size()). */
    public TPair getNthNonDisambiguatorPair(int n) {
        TPair p;
        int count = 0;
        for (int i = 0; i < size(); i++) {
            p = get(i);
            if (!p.isDisambiguator())
                if (count++ == n)
                    return p;
        }
        throw new IllegalArgumentException("n, " + n + " is too big for this list of pairs, " + toString());
    }

    /** Returns the number of pairs in this list that are not entirely
     *  disambiguators. */
    public int sizeMinusDisambiguators() {
        int count = 0;
        for (int i = 0; i < size(); i++) {
            if (!get(i).isDisambiguator())
                ++count;
        }
        return count;
    }

    /** Adds p to the end of this list. */
    public void add(TPair p) {
        if (p == null || (p.getLeft() == null && p.getRight() == null))
            throw new IllegalArgumentException("p is weird");
        al.add(p);
    }

    /** Prepends p to the current list of TPairs. */
    public void prepend(TPair p) {
        al.add(0, p);
    }

    /** Appends p to the current list of TPairs. */
    public void append(TPair p) {
        al.add(p);
    }

    /** Returns the number of TPairs in this list. */
    public int size() { return al.size(); }

    /** Returns a human-readable representation.
     *  @return something like [(R . ), (D . O)] */
    public String toString2() {
        return al.toString();
    }

    /** Returns a human-readable representation like {G}{YA} or
     *  {G-}{YA}. */
    public String toString() {
        int sz = size();
        StringBuffer b = new StringBuffer();
        for (int i = 0; i < sz; i++) {
            b.append('{');
            if (null != get(i).getLeft())
                b.append(get(i).getLeft());
            if (null != get(i).getRight())
                b.append(get(i).getRight());
            b.append('}');
        }
        return b.toString();
    }

    /** Returns the ACIP corresponding to this TPairList.  It will
     *  be as ambiguous as the input.  It may have more disambiguators
     *  than the original, such as in the case of the ACIP {1234}. */
    String recoverACIP() {
        StringBuffer original = new StringBuffer();
        int sz = size();
        for (int i = 0; i < sz; i++) {
            TPair p = get(i);
            if (p.getLeft() != null)
                original.append(p.getLeft());
            if (p.getRight() != null)
                original.append(p.getRight());
        }
        return original.toString();
    }

    /** Returns true if this list contains ( . <vowel>) or (A . ),
     *  which are two simple errors you encounter if you interpret DAA
     *  or TAA or DAI or DAE the wrong way. TODO(DLC)[EWTS->Tibetan]: ACIP vs. EWTS */
    boolean hasSimpleError() {
        int sz = size();
        for (int i = 0; i < sz; i++) {
            TPair p = get(i);
            if (traits.hasSimpleError(p))
                return true;
        }
        return false;
    }

    /** Finds errors so simple that they can be detected without using
     *  the rules of Tibetan spelling (i.e., tsheg bar syntax).
     *  Returns an error message, or null if there is no error that
     *  you can find without the help of tsheg bar syntax rules. */
    // FIXME: This is needlessly ACIP specific -- rename and change text of messages
    String getACIPError(String originalACIP, boolean shortMessages) { // TODO(DLC)[EWTS->Tibetan] misnomer.
        // FIXME: this returns just the first error.  List all errors
        // at once.
        int sz = size();
        if (0 == sz) // FIXME: see if you can make this happen...
            return ErrorsAndWarnings.getMessage(122, shortMessages,
                                                ((null != originalACIP)
                                                 ? originalACIP
                                                 : ""),
                                                traits);
        String translit
            = (null != originalACIP) ? originalACIP : recoverACIP();
        boolean mustBeEntirelyNumeric = get(0).isNumeric();
        for (int i = 0; i < sz; i++) {
            TPair p = get(i);
            if (mustBeEntirelyNumeric != p.isNumeric())
                return ErrorsAndWarnings.getMessage(123, shortMessages, translit, traits);

            if (traits.isACIP()
                && ((i == 0 && "V".equals(p.getLeft()))
                    || (i > 0 && "V".equals(p.getLeft())
                        && (null != get(i - 1).getRight()
                            && !"+".equals(get(i - 1).getRight()))))) {
                return ErrorsAndWarnings.getMessage(124, shortMessages, translit, traits);
            } else if (traits.aVowel().equals(p.getLeft())
                       && (null == p.getRight()
                           || "".equals(p.getRight()))) {
                return ErrorsAndWarnings.getMessage(125, shortMessages, translit, traits);
            } else if (null != p.getRight()
                       && !"+".equals(p.getRight())
                       && !traits.disambiguator().equals(p.getRight())
                       && !traits.isWowel(p.getRight())
                       && false /* TODO(DLC)[EWTS->Tibetan]: think about this harder. */) {
            	return "ErrorNumberDLC1: We don't yet support stacking vowels, convert {" + translit + "} manually.";
                // TODO(DLC)[EWTS->Tibetan]: test, i think we do support it
            } else if ((null == p.getLeft()
                        && (!traits.disambiguator().equals(p.getRight())
                        	&& (!traits.vowelAloneImpliesAChen()
                        		|| !traits.aVowel().equals(p.getRight()))))
                       || (null != p.getLeft()
                           && (!traits.isConsonant(p.getLeft()) && (!traits.vowelAloneImpliesAChen() || !traits.aVowel().equals(p.getLeft())))
                           && !p.isNumeric())) {
                // FIXME: stop handling this outside of ErrorsAndWarnings:
                if (null == p.getLeft()) {
                    if (shortMessages)
                        return "128: {" + translit + "}";
                    else
                        return "128: Cannot convert " + traits.shortTranslitName() + " {" + translit + "} because " + p.getRight() + " is a \"vowel\" without an associated consonant.";
                } else {
                    if (shortMessages)
                        return "129: {" + translit + "}";
                    else
                        return "129: Cannot convert " + traits.shortTranslitName() + " {" + translit + "} because " + p.getLeft() + " is not an " + traits.shortTranslitName() + " consonant.";
                }
            }
        }
        if ("+".equals(get(sz - 1).getRight())) {
            return ErrorsAndWarnings.getMessage(126, shortMessages, translit, traits);
        }
        // FIXME: really this is a warning, not an error:
        if (traits.disambiguator().equals(get(sz - 1).getRight())) {
            return ErrorsAndWarnings.getMessage(127, shortMessages, translit, traits);
        }
        return null;
    }

    /** Returns true if and only if either x is a TPairList object
     *  representing the same TPairs in the same order or x is a
     *  String that is equals to the result of {@link #toString()}. */
    public boolean equals(Object x) {
        if (x instanceof TPairList) {
            return al.equals(((TPairList)x).al);
        } else if (x instanceof String) {
            return toString().equals(x) || toString2().equals(x);
        }
        return false;
    }

    /** Returns true if and only if this list is empty. */
    public boolean isEmpty() { return al.isEmpty(); }

    /** Returns a hashCode appropriate for use with our {@link
     *  #equals(Object)} method. */
    public int hashCode() { return al.hashCode(); }

    private static final int STOP_STACK = 0;
    private static final int KEEP_STACKING = 1;
    private static final int ALWAYS_KEEP_STACKING = 2;
    private static final int ALWAYS_STOP_STACKING = 3;

    /** Returns a set (as as ArrayList) of all possible TStackLists.
     *  Uses knowledge of Tibetan spelling rules (i.e., tsheg bar
     *  syntax) to do so.  If this list of pairs has something clearly
     *  illegal in it, or is empty, or is merely a list of
     *  disambiguators etc., then this returns null.  Never returns an
     *  empty parse tree.
     */
    public TParseTree getParseTree() {
        // TODO(DLC)[EWTS->Tibetan]: EWTS NOTE: this is still useful for EWTS: In EWTS, bkra
        // is b.k+ra, smra is s+m+ra, and tshmra is invalid.

        // We treat [(B . ), (G . +), (K . ), (T . A)] as if it could
        // be {B+G+K+T} or {B}{G+K+T}; we handle prefixes specially
        // this way.  [(T . ), (G . +), (K . ), (T . A)] is clearly
        // {T+G+K+TA}
        //
        // We don't care if T+G+K+T is in TMW or not -- there is no
        // master list of stacks.

        int sz = size();
        for (int i = 0; i < sz; i++)
            if (traits.isClearlyIllegal(get(i)))
                return null;

        if (sz < 1) return null;

        // When we see a stretch of ACIP without a disambiguator or a
        // vowel, that stretch is taken to be one stack unless it may
        // be prefix-root or suffix-postsuffix or suffix/postsuffix-'
        // -- the latter necessary because GAMS'I is GAM-S-'I, not
        // GAM-S+'I.  'UR, 'US, 'ANG, 'AM, 'I, 'O, 'U -- all begin
        // with '.  So we can have zero, one, two, or three special
        // break locations.  (The kind that aren't special are the
        // break after G in G-DAMS, or the break after G in GADAMS or
        // GEDAMS.)
        //
        // If a nonnegative number appears in breakLocations[i], it
        // means that pair i may or may not be stacked with pair i+1.
        int nextBreakLoc = 0;
        int breakLocations[] = { -1, -1, -1 };

        boolean mayHavePrefix;

        // Handle the first pair specially -- it could be a prefix.
        if (ddebug) System.out.println("i is " + 0);
        if ((mayHavePrefix = get(0).isPrefix())
            && sz > 1
            && null == get(0).getRight()) {
            // special case: we must have a branch in the parse tree
            // for the initial part of this pair list.  For example,
            // is DKHYA D+KH+YA or D-KH+YA?  It depends on prefix
            // rules (can KH+YA take a DA prefix?), so the parse tree
            // includes both.
            breakLocations[nextBreakLoc++] = 0;
        }

        // stack numbers start at 1.
        int stackNumber = (get(0).endsACIPStack()) ? 2 : 1;
        // this starts at 0.
        int stackStart = (get(0).endsACIPStack()) ? 1 : 0;

        int numeric = get(0).isNumeric() ? 1 : (get(0).isDisambiguator() ? 0 : -1);

        for (int i = 1; i < sz; i++) {
            if (ddebug) System.out.println("i is " + i);
            TPair p = get(i);

            // GA-YOGS should be treated like GAYOGS or G-YOGS:
            if (p.isDisambiguator()) continue;

            boolean nn;
            if ((nn = p.isNumeric()) && ("+".equals(get(i-1).getRight())
                                         || "+".equals(p.getRight())))
                return null; // clearly illegal.  You can't stack numbers.
            if (nn) {
                if (-1 == numeric)
                    return null; // you can't mix numbers and letters.
                else if (0 == numeric)
                    numeric = 1;
            } else if (!p.isDisambiguator()) {
                if (numeric == 1)
                    return null; // you can't mix numbers and letters.
                else if (0 == numeric)
                    numeric = -1;
            }

            if (i+1==sz || p.endsACIPStack()) {
                if (/* the stack ending here might really be
                       suffix-postsuffix or
                       suffix-appendage or
                       suffix-postsuffix-appendage */
                    (mayHavePrefix && (stackNumber == 2 || stackNumber == 3))
                    || (!mayHavePrefix && (stackNumber == 2))) {
                    if (i > stackStart) {
                        if (get(stackStart).isSuffix()
                            && (get(stackStart+1).isPostSuffix() // suffix-postsuffix
                                || "'".equals(get(stackStart+1).getLeft()))) // suffix-appendage
                            breakLocations[nextBreakLoc++] = stackStart;
                        if (i > stackStart + 1) {
                            // three to play with, maybe it's
                            // suffix-postsuffix-appendage.
                            if (get(stackStart).isSuffix()
                                && get(stackStart+1).isPostSuffix()
                                && "'".equals(get(stackStart+2).getLeft()))
                                breakLocations[nextBreakLoc++] = stackStart+1;
                        }
                    }
                    // else no need to insert a breakLocation, we're
                    // breaking hard.
                }
                if (/* the stack ending here might really be
                       postsuffix-appendage (e.g., GDAM-S'O) */
                    (mayHavePrefix && (stackNumber == 3 || stackNumber == 4))
                    || (!mayHavePrefix && (stackNumber == 3))) {
                    if (i == stackStart+1) { // because GDAM--S'O is illegal, and because it's 'ANG, not 'NG, 'AM, not 'M -- ' always ends the stack
                        if (get(stackStart).isPostSuffix()
                            && "'".equals(get(stackStart+1).getLeft()))
                            breakLocations[nextBreakLoc++] = stackStart;
                    }
                }
                ++stackNumber;
                stackStart = i+1;
            }
        }
        // FIXME: we no longer need all these breakLocations -- we can handle SAM'AM'ANG without them.

        // Now go from hard break (i.e., (* . VOWEL or -)) to hard
        // break (and there's a hard break after the last pair, of
        // course, even if it is (G . ) or (G . +) [the latter being
        // hideously illegal]).  Between the hard breaks, there will
        // be 1, 2, or 4 (can you see why 8 isn't possible, though
        // numBreaks can be 3?) possible parses.  There are two of DGA
        // in DGAMS'O -- D-GA and D+GA.  There are 4 of MS'O in
        // DGAMS'O -- M-S-'O, M-S+'O, M+S-'O, and M+S+'O.  Add one
        // TStackListList per hard break to pt, the parse tree.
        int startLoc = 0; // which pair starts this hard break?

        // FIXME: assert this
        if ((breakLocations[1] >= 0 && breakLocations[1] <= breakLocations[0])
            || (breakLocations[2] >= 0 && breakLocations[2] <= breakLocations[1]))
            throw new Error("breakLocations is monotonically increasing, ain't it?");
        TParseTree pt = new TParseTree();
        for (int i = 0; i < sz; i++) {
            if (i+1 == sz || get(i).endsACIPStack()) {
                TStackListList sll = new TStackListList(4); // maximum is 4.

                int numBreaks = 0;
                int breakStart = -1;
                for (int jj = 0; jj < breakLocations.length; jj++) {
                    if (breakLocations[jj] >= startLoc
                        && breakLocations[jj] <= i) {
                        if (breakStart < 0)
                            breakStart = jj;
                        ++numBreaks;
                    }
                }

                // Count from [0, 1<<numBreaks).  At each point,
                // counter equals b2b1b0 in binary.  1<<numBreaks is
                // the number of stack lists there are in this stack
                // list list of the parse tree.  Break at location
                // breakLocations[breakStart+0] if and only if b0 is
                // one, at location breakLocations[breakStart+1] if
                // and only if b1 is one, etc.
                for (int counter = 0; counter < (1<<numBreaks); counter++) {
                    TStackList sl = new TStackList();
                    boolean slIsInvalid = false;
                    TPairList currentStack = new TPairList(traits);
                    TPairList currentStackUnmodified = new TPairList(traits);
                    for (int k = startLoc; k <= i; k++) {
                        if (!get(k).isDisambiguator()) {
                            if (get(k).isNumeric()
                                || (get(k).getLeft() != null
                                    && (traits.isConsonant(get(k).getLeft())
                                        || traits.vowelAloneImpliesAChen() && traits.aVowel().equals(get(k).getLeft())))) {
                                currentStack.add(get(k).insideStack());
                                currentStackUnmodified.add(get(k));
                            } else {
                                return null; // sA, for example, is illegal.
                            }
                        }
                        if (k == i || get(k).endsACIPStack()) {
                            if (!currentStack.isEmpty()) {
                                if (traits.couldBeValidStack(currentStackUnmodified)) {
                                    sl.add(currentStack.asStack());
                                } else {
                                    slIsInvalid = true;
                                    break;
                                }
                            }
                            currentStack = new TPairList(traits);
                            currentStackUnmodified = new TPairList(traits);
                        } else {
                            if (numBreaks > 0) {
                                for (int j = 0; breakStart+j < 3; j++) {
                                    if (k == breakLocations[breakStart+j]
                                        && 1 == ((counter >> j) & 1)) {
                                        if (!currentStack.isEmpty()) {
                                            if (traits.couldBeValidStack(currentStackUnmodified)) {
                                                sl.add(currentStack.asStack());
                                            } else {
                                                slIsInvalid = true;
                                                break;
                                            }
                                        }
                                        currentStack = new TPairList(traits);
                                        currentStackUnmodified = new TPairList(traits);
                                        break; // shouldn't matter, but you never know
                                    }
                                }
                            }
                        }
                    }
                    if (!slIsInvalid && !sl.isEmpty()) {
                        sll.add(sl);
                    }
                }

                if (!sll.isEmpty())
                    pt.add(sll);
                startLoc = i+1;
            }
        }


        if (pt.isEmpty()) return null;
        return pt;
    }

    private static final boolean ddebug = false;

    /** Mutates this TPairList object such that the last pair is
     *  empty or is a vowel, but is never the stacking operator ('+')
     *  or a disambiguator (i.e., a '-' on the right).
     *  @return this instance */
    private TPairList asStack() {
        if (!isEmpty()) {
            TPair lastPair = get(size() - 1);
            if ("+".equals(lastPair.getRight()))
                al.set(size() - 1, new TPair(traits, lastPair.getLeft(), null));
            else if (traits.disambiguator().equals(lastPair.getRight()))
                al.set(size() - 1, new TPair(traits, lastPair.getLeft(), null));
        }
        return this;
    }

    /** Adds the TGCPairs corresponding to this list to the end of
     *  pl. Some TPairs correspond to more than one TGCPair
     *  ({AA:}); some TGCPairs correspond to more than one TPair
     *  ({G+YA}).  To keep track, indexList will be appended to in
     *  lockstep with pl.  index (wrapped as an {@link
     *  java.lang#Integer}) will be appended to indexList once each
     *  time we append to pl.  This assumes that this TPairList
     *  corresponds to exactly one Tibetan grapheme cluster (i.e.,
     *  stack).  Note that U+0F7F (ACIP {:}) is part of a stack, not a
     *  stack all on its own. */
    void populateWithTGCPairs(ArrayList pl,
                              ArrayList indexList, int index) {
        int sz = size();
        if (sz == 0) {
            return;
        } else {
            // drop the disambiguator, if there is one.

            boolean isNumeric = false;
            StringBuffer lWylie = new StringBuffer();
            int i;
            // All pairs but the last:
            for (i = 0; i + 1 < sz; i++) {
                lWylie.append(get(i).getWylie());
                if (get(i).isNumeric())
                    isNumeric = true;
            }

            // The last pair:
            TPair p = get(i);
            ThdlDebug.verify(!"+".equals(p.getRight()));
            boolean add_U0F7F = false;
            int where;
            if (p.getRight() != null
                && (where = p.getRight().indexOf(':')) >= 0) { // TODO(DLC)[EWTS->Tibetan]
                // this ':' guy is his own TGCPair.
                add_U0F7F = true;
                StringBuffer rr = new StringBuffer(p.getRight());
                rr.deleteCharAt(where);
                p = new TPair(traits, p.getLeft(), rr.toString());
            }
            boolean hasNonAVowel = (!traits.aVowel().equals(p.getRight())
                                    && null != p.getRight());
            String thislWylie = traits.getEwtsForConsonant(p.getLeft());
            if (thislWylie == null) {
                char ch;
                if (p.isNumeric()) {
                    thislWylie = p.getLeft();
                    isNumeric = true;
                }
            }

            if (null == thislWylie)
                throw new Error("BADNESS AT MAXIMUM: p is " + p + " and thislWylie is " + thislWylie);
            lWylie.append(thislWylie);
            StringBuffer ll = new StringBuffer(lWylie.toString());
            int ww;
            while ((ww = ll.indexOf("+")) >= 0)
                ll.deleteCharAt(ww);
            boolean isTibetan = TibetanMachineWeb.isWylieTibetanConsonantOrConsonantStack(ll.toString());
            boolean isSanskrit = TibetanMachineWeb.isWylieSanskritConsonantStack(lWylie.toString());
            if (ddebug && !isTibetan && !isSanskrit && !isNumeric) {
                System.out.println("OTHER for " + lWylie + " with vowel " + traits.getEwtsForWowel(p.getRight()) + " and p.getRight()=" + p.getRight());
            }
            if (isTibetan && isSanskrit) {
                 // RVA, e.g.  It must be Tibetan because RWA is what
                 // you'd use for RA over fixed-form WA.
                isSanskrit = false;
            }
            if (ddebug && hasNonAVowel && traits.getEwtsForWowel(p.getRight()) == null) {
                System.out.println("vowel " + traits.getEwtsForWowel(p.getRight()) + " and p.getRight()=" + p.getRight());
            }
            TGCPair tp;
            indexList.add(new Integer(index));
            tp = new TGCPair(lWylie.toString(),
                             (hasNonAVowel
                              ? traits.getEwtsForWowel(p.getRight())
                              : ""),
                             (isNumeric
                              ? TGCPair.TYPE_OTHER
                              : (isSanskrit
                                 ? TGCPair.TYPE_SANSKRIT
                                 : (isTibetan
                                    ? TGCPair.TYPE_TIBETAN
                                    : TGCPair.TYPE_OTHER))));
            pl.add(tp);
            if (add_U0F7F) {
                indexList.add(new Integer(index));
                pl.add(new TGCPair("H", null, TGCPair.TYPE_OTHER)); // TODO(DLC)[EWTS->Tibetan]
            }
        }
    }

    private static HashMap unicodeExceptionsMap = null;

    /** Appends legal Unicode corresponding to this stack to sb.
     *  FIXME: which normalization form, if any? */
    void getUnicode(StringBuffer sb) {
        // The question is this: U+0FB1 or U+0FBB?  U+0FB2 or U+0FBC?
        // The answer: always the usual form, not the full form,
        // except for a few known stacks (all the ones with full-form,
        // non-WA subjoined consonants in TMW: [in EWTS, they are:]
        // r+Y, N+D+Y, N+D+R+y, k+Sh+R).  Note that wa-zur, U+0FAD, is
        // never confused for U+0FBA because "V" and "W" are different
        // transliterations.  EWTS {r+W} thus needs no special
        // treatment during ACIP->Unicode.

        StringBuffer nonVowelSB = new StringBuffer();
        int beginningIndex = sb.length();
        boolean subscribed = false;
        int szz = size();
        int i;
        for (i = 0; i + ((1 == szz) ? 0 : 1) < szz; i++) {
            TPair p = get(i);

            // FIXME: change this to an assertion:
            if ((1 != szz) && null != p.getRight() && !"+".equals(p.getRight()))
                throw new Error("Oops -- this stack (i.e., " + toString() + ") is funny, so we can't generate proper Unicode for it.  i is " + i + " and size is " + szz);

            p.getUnicode(nonVowelSB, subscribed);
            subscribed = true;
        }
        if (szz > 1) {
            TPair p = get(i);
            StringBuffer vowelSB = new StringBuffer();
            p.getUnicode(nonVowelSB, vowelSB, subscribed /* which is true */);

            if (null == unicodeExceptionsMap) {
                unicodeExceptionsMap = new HashMap();
                unicodeExceptionsMap.put("\u0f69\u0fb2", "\u0f69\u0fbc"); // KshR (variety 1)
                unicodeExceptionsMap.put("\u0f40\u0fb5\u0fb2", "\u0f40\u0fb5\u0fbc"); // KshR (variety 2)
                unicodeExceptionsMap.put("\u0f4e\u0f9c\u0fb2\u0fb1", "\u0f4e\u0f9c\u0fbc\u0fb1"); // ndRY
                unicodeExceptionsMap.put("\u0f4e\u0f9c\u0fb1", "\u0f4e\u0f9c\u0fbb"); // ndY
                unicodeExceptionsMap.put("\u0f61\u0fb1", "\u0f61\u0fbb"); // YY
                unicodeExceptionsMap.put("\u0f62\u0fb1", "\u0f6a\u0fbb"); // RY
                unicodeExceptionsMap.put("\u0f62\u0fba", "\u0f6a\u0fba"); // RW
                unicodeExceptionsMap.put("\u0f62\u0fb4", "\u0f6a\u0fb4"); // RSHA
                unicodeExceptionsMap.put("\u0f62\u0fb4\u0fb1", "\u0f6a\u0fb4\u0fb1"); // RSHYA
                unicodeExceptionsMap.put("\u0f62\u0fb5", "\u0f6a\u0fb5"); // Rsh
                unicodeExceptionsMap.put("\u0f62\u0fb5\u0f9e", "\u0f6a\u0fb5\u0f9e"); // Rshn
                unicodeExceptionsMap.put("\u0f62\u0fb5\u0f9e\u0fb1", "\u0f6a\u0fb5\u0f9e\u0fb1"); // RshnY
                unicodeExceptionsMap.put("\u0f62\u0fb5\u0fa8", "\u0f6a\u0fb5\u0fa8"); // RshM
                unicodeExceptionsMap.put("\u0f62\u0fb5\u0fb1", "\u0f6a\u0fb5\u0fb1"); // RshY
                unicodeExceptionsMap.put("\u0f62\u0fb6", "\u0f6a\u0fb6"); // RS
            }
            String mapEntry = (String)unicodeExceptionsMap.get(nonVowelSB.toString());
            if (traits.isACIP() && null != mapEntry)
                sb.append(mapEntry);
            else
                sb.append(nonVowelSB);
            sb.append(vowelSB);
        } else {
            sb.append(nonVowelSB);
        }
    }

    /** Appends the DuffCodes that correspond to this grapheme cluster
     *  to duffsAndErrors, or appends a String that is an error or
     *  warning message (a short one iff shortMessages is true) saying
     *  that TMW cannot represent this grapheme cluster.  The message
     *  is Error 137 if noCorrespondingTMWGlyphIsError is true;
     *  otherwise, it's Warning 511. */
    void getDuff(ArrayList duffsAndErrors,
                 boolean shortMessages,
                 boolean noCorrespondingTMWGlyphIsError) {
        int previousSize = duffsAndErrors.size();
        StringBuffer wylieForConsonant = new StringBuffer();
        for (int x = 0; x + 1 < size(); x++) {
            wylieForConsonant.append(get(x).getWylie(false));
        }
        TPair lastPair = get(size() - 1);
        wylieForConsonant.append(lastPair.getWylie(true));
        String hashKey = wylieForConsonant.toString();

        // Because EWTS has special handling for full-formed
        // subjoined consonants, we have special handling here.
        if ("r+y".equals(hashKey))
            hashKey = "r+Y";
        else if ("y+y".equals(hashKey))
            hashKey = "y+Y";
        else if ("N+D+y".equals(hashKey))
            hashKey = "N+D+Y";
        else if ("N+D+r+y".equals(hashKey))
            hashKey = "N+D+R+y";
        else if ("k+Sh+r".equals(hashKey))
            hashKey = "k+Sh+R";

        // TPair.getWylie(..) returns "W" sometimes when "w" is what
        // really should be returned.  ("V" always causes "w" to be
        // returned, which is fine.)  We'll change "W" to "w" here if
        // we need to.  We do it only for a few known stacks (the ones
        // in TMW).
        if ("W".equals(hashKey))
            hashKey = "w";
        else if ("W+y".equals(hashKey))
            hashKey = "w+y";
        else if ("W+r".equals(hashKey))
            hashKey = "w+r";
        else if ("W+n".equals(hashKey))
            hashKey = "w+n";
        else if ("W+W".equals(hashKey))
            hashKey = "w+W";

        if ("r+Y".equals(hashKey)
            || "r+W".equals(hashKey)
            || "r+sh".equals(hashKey)
            || "r+sh+y".equals(hashKey)
            || "r+Sh".equals(hashKey)
            || "r+Sh+N".equals(hashKey)
            || "r+Sh+N+y".equals(hashKey)
            || "r+Sh+m".equals(hashKey)
            || "r+Sh+y".equals(hashKey)
            || "r+s".equals(hashKey)
            ) {
            hashKey = "R" + hashKey.substring(1); // r+Y => R+Y, etc.
        }

        if (!TibetanMachineWeb.isKnownHashKey(hashKey)) {
            hashKey = hashKey.replace('+', '-');
            if (!TibetanMachineWeb.isKnownHashKey(hashKey)) {
                duffsAndErrors.add(ErrorsAndWarnings.getMessage(noCorrespondingTMWGlyphIsError
                                                                ? 137
                                                                : 511,
                                                                shortMessages,
                                                                recoverACIP(),
                                                                traits));
                return;
            }
        }
        if (lastPair.getRight() == null
            || lastPair.equals(traits.disambiguator())) {
            duffsAndErrors.add(TibetanMachineWeb.getGlyph(hashKey));
        } else {
            traits.getDuffForWowel(duffsAndErrors,
                                   TibetanMachineWeb.getGlyph(hashKey),
                                   lastPair.getRight());
        }
        if (previousSize == duffsAndErrors.size())
            throw new Error("TPairList with no duffs? " + toString()); // FIXME: change to assertion.
    }
}