More and better tests; fixed some bugs in LegalTshegBar.

2003-03-28 03:49:49 +00:00 · 2003-03-28 03:49:49 +00:00 · 2b81020b0e
commit 2b81020b0e
parent 35a9869aac
4 changed files with 317 additions and 68 deletions
--- a/source/org/thdl/tib/text/tshegbar/LegalTshegBar.java
+++ b/source/org/thdl/tib/text/tshegbar/LegalTshegBar.java
@ -100,13 +100,11 @@ And also there are cases where they combine. For ex you can have
 *  </ul>
 *
 *  <p>Note that this class uses only a subset of Unicode to represent
- *  consonants and vowels.  In some situations, you should use {@link
- *  #EWSUB_wa_zur} to represent the consonant wa, while in others you
- *  should use {@link #EWC_wa}, even though you mean to subscribe a
- *  fixed-form wa.  Basically, stick to the codepoints for which
- *  enumerations exist in {@link
- *  org.thdl.tib.text.tshegbar.UnicodeConstants} and use your common
- *  sense.</p>
+ *  consonants and vowels.  You should always use the nominal form of
+ *  a letter, e.g. {@link #EWC_wa}, not {@link #EWSUB_wa_zur}, to
+ *  represent letters.  (What if you mean to subscribe a fixed-form
+ *  wa?  Well, that's not a legal tsheg-bar, so you don't mean to do
+ *  that.)</p>
 *
 *  <p>For a pretty good, concise summary of the rules this class
 *  knows about, see Joe B. Wilson's <i>Translating Buddhism from
@ -142,8 +140,6 @@ public class LegalTshegBar
    /** Do not use this constructor. */
    private LegalTshegBar() { super(); }

-    // DLC FIXME: do we want to accept EWC_ra or EWSUB_ra_btags for
-    // the root letter, even if there is no head letter?  Etc.
    /** Constructs a valid Tibetan syllable or throws an exception.
     *  Use EW_ABSENT (or null in the case of <code>suffix</code>) for
     *  those parts of the syllable that are absent.  The root letter
@ -180,7 +176,7 @@ public class LegalTshegBar
        // copying is slightly inefficient because it is unnecessary
        // since Java strings are read-only, but translating this code
        // to C++ is easier this way.
-        this.suffix = new String(suffix);
+        this.suffix = (suffix == null) ? null : new String(suffix);

        this.postsuffix = postsuffix;
        this.vowel = vowel;
@ -198,7 +194,8 @@ public class LegalTshegBar
        throws IllegalArgumentException
    {
        this(prefix, headLetter, rootLetter, subjoinedLetter,
-             hasWaZur, hasAChung, new String(new char[] { suffix }),
+             hasWaZur, hasAChung,
+             (suffix == EW_ABSENT) ? null : new String(new char[] { suffix }),
             postsuffix, vowel);
    }

@ -216,7 +213,10 @@ public class LegalTshegBar
    }

    /** Returns the non-EWSUB_wa_zur consonant subscribed to the root
-     *  consonant, or EW_ABSENT if none is.  If you want to know if there is a wa-zur, use {@link #hasWaZurSubjoinedToRootLetter()}*/
+     *  consonant, or EW_ABSENT if none is.  If you want to know if
+     *  there is a wa-zur, use {@link
+     *  #hasWaZurSubjoinedToRootLetter()}.  This returns EWC_ra, not
+     *  EWSUB_ra_btags, etc.  */
    public char getSubjoinedLetter() {
        return subjoinedLetter;
    }
@ -458,11 +458,11 @@ public class LegalTshegBar
        if (EW_ABSENT == subjoinedLetter) {
            return isConsonantThatTakesWaZur(rootLetter);
        }
-        if (EWSUB_ra_btags == subjoinedLetter) {
+        if (EWC_ra == subjoinedLetter) {
            if (EWC_ga == rootLetter
                    || EWC_da == rootLetter)
                return true;
-        } else if (EWSUB_ya_btags == subjoinedLetter) {
+        } else if (EWC_ya == subjoinedLetter) {
            if (EWC_pha == rootLetter)
                return true;
        }
@ -599,6 +599,9 @@ public class LegalTshegBar
     *  this is {@link #getConnectiveCaseSuffix()}
     *  @param postsuffix the optional postsuffix, which should be
     *  EWC_sa or EWC_da
+     *  @param errorBuffer if non-null, and if the return code is
+     *  false, then the reason that this is not a legal tsheg-bar will
+     *  be appended to errorBuffer.
     *  @param vowel the optional vowel */
    public static boolean formsLegalTshegBar(char prefix,
                                             char headLetter,
@ -608,12 +611,14 @@ public class LegalTshegBar
                                             boolean hasAChung,
                                             String suffix,
                                             char postsuffix,
-                                             char vowel)
+                                             char vowel,
+                                             StringBuffer errorBuffer)
    {
        try {
            return internalLegalityTest(prefix, headLetter, rootLetter,
                                        subjoinedLetter, hasWaZur, hasAChung,
-                                        suffix, postsuffix, vowel, false);
+                                        suffix, postsuffix, vowel, false,
+                                        errorBuffer);
        } catch (IllegalArgumentException e) {
            throw new Error("This simply cannot happen, but it did.");
        }
@ -631,12 +636,15 @@ public class LegalTshegBar
                                             boolean hasAChung,
                                             char suffix,
                                             char postsuffix,
-                                             char vowel)
+                                             char vowel,
+                                             StringBuffer errorBuffer)
    {
        return formsLegalTshegBar(prefix, headLetter, rootLetter,
                                  subjoinedLetter, hasWaZur, hasAChung,
-                                  new String(new char[] { suffix }),
-                                  postsuffix, vowel);
+                                  ((suffix == EW_ABSENT)
+                                   ? null
+                                   : new String(new char[] { suffix })),
+                                  postsuffix, vowel, errorBuffer);
    }


@ -659,12 +667,17 @@ public class LegalTshegBar
    {
        internalLegalityTest(prefix, headLetter, rootLetter,
                             subjoinedLetter, hasWaZur, hasAChung,
-                             suffix, postsuffix, vowel, true);
+                             suffix, postsuffix, vowel, true, null);
    }

    /** Voodoo.  Stand back. */
-    private static boolean internalThrowThing(boolean doThrow, String msg)
+    private static boolean internalThrowThing(boolean doThrow,
+                                              StringBuffer errorBuf,
+                                              String msg)
    {
+        if (errorBuf != null) {
+            errorBuf.append(msg);
+        }
        if (doThrow)
            throw new IllegalArgumentException(msg);
        return false;
@ -674,6 +687,8 @@ public class LegalTshegBar
     *  thrown, then this combination makes a legal Tibetan syllable.
     *  To learn about the arguments, see {@link
     *  #formsLegalTshegBar(char,char,char,char,boolean,boolean,String,char,char)}.
+     *  @param errorBuf if non-null, the reason this is illegal will
+     *  be written here, if this is illegal
     *  @return true if this syllable is legal, false if this syllable
     *  is illegal and throwIfIllegal is false, does not return if
     *  this syllable is illegal and throwIfIllegal is true
@ -689,11 +704,13 @@ public class LegalTshegBar
                                                String suffix,
                                                char postsuffix,
                                                char vowel,
-                                                boolean throwIfIllegal)
+                                                boolean throwIfIllegal,
+                                                StringBuffer errorBuf)
        throws IllegalArgumentException
    {
        if (!isNominalRepresentationOfConsonant(rootLetter))
            return internalThrowThing(throwIfIllegal,
+                                      errorBuf,
                                      "The root letter must be one of the standard thirty Tibetan consonants, and must be represented nominally, not, for example, by FIXED-FORM RA (&#92;u0F6A)");

        if (EW_ABSENT != prefix) {
@ -701,28 +718,34 @@ public class LegalTshegBar
            // and that it can go with this root letter:
            if (!isNominalRepresentationOfPrefix(prefix))
                return internalThrowThing(throwIfIllegal,
+                                          errorBuf,
                                          "The prefix is not absent, so it must be one of the five possible prefixes.");
            // DLC test that it can go with the root letter.
        }

        if (EW_ABSENT != subjoinedLetter) {
-            if (EWSUB_ya_btags == subjoinedLetter) {
+            if (EWC_ya == subjoinedLetter) {
                if (!isConsonantThatTakesYaBtags(rootLetter)) {
                    return internalThrowThing(throwIfIllegal,
+                                              errorBuf,
                                              "Cannot subscribe ya-btags to that root letter.");
                }
-            } else if (EWSUB_ra_btags == subjoinedLetter) {
+            } else if (EWC_ra == subjoinedLetter) {
                if (!isConsonantThatTakesRaBtags(rootLetter)) {
                    return internalThrowThing(throwIfIllegal,
+                                              errorBuf,
                                              "Cannot subscribe ra-btags to that root letter.");
                }
-            } else if (EWSUB_la_btags == subjoinedLetter) {
+            } else if (EWC_la == subjoinedLetter) {
                if (!isConsonantThatTakesLaBtags(rootLetter)) {
                    return internalThrowThing(throwIfIllegal,
+                                              errorBuf,
                                              "Cannot subscribe la-btags to that root letter.");
                }
-            } else if (EWSUB_wa_zur == subjoinedLetter) {
-                throw new Error("DLC FIXME: can this happen?  wa-zur comes in via the boolean argument hasWaZur, not via subjoinedLetter.");
+            } else if (EWC_wa == subjoinedLetter) {
+                return internalThrowThing(throwIfIllegal,
+                                          errorBuf,
+                                          "The presence of wa-zur must be specified via a boolean parameter.");
            } else {
                // check for a common mistake:
                if ('\u0FBA' == subjoinedLetter
@ -730,9 +753,11 @@ public class LegalTshegBar
                    || '\u0FBC' == subjoinedLetter)
                    {
                        return internalThrowThing(throwIfIllegal,
+                                                  errorBuf,
                                                  "The subjoined letter given is subjoinable, but you gave the fixed-form variant, which is not used in Tibetan syllables but is sometimes used in Tibetan transliteration of Sanskrit, Chinese, or some non-Tibetan language.");
                    }
                return internalThrowThing(throwIfIllegal,
+                                          errorBuf,
                                          "The subjoined letter given is not one of the four consonants that may be subscribed.");
            }
        } // subjoinedLetter tests
@ -743,10 +768,12 @@ public class LegalTshegBar
            if (!getConnectiveCaseSuffix().equals(suffix)) {
                if (suffix.length() != 1) {
                    return internalThrowThing(throwIfIllegal,
+                                              errorBuf,
                                              "Illegal suffix -- not one of the legal complex suffixes like 'u, 'o, 'i, 'am.");
                }
                if (!isNominalRepresentationOfSimpleSuffix(suffix.charAt(0))) {
                    return internalThrowThing(throwIfIllegal,
+                                              errorBuf,
                                              "Illegal suffix -- not one of the ten legal suffixes: "
                                              + UnicodeUtils.unicodeCodepointToString(suffix.charAt(0)));
                }
@ -755,6 +782,7 @@ public class LegalTshegBar
        if (EW_ABSENT != postsuffix) {
            if (null == suffix)
                return internalThrowThing(throwIfIllegal,
+                                          errorBuf,
                                          "You cannot have a postsuffix unless you also have a suffix.");
        }

@ -762,11 +790,13 @@ public class LegalTshegBar
            if (EWC_ra == headLetter) {
                if (!isConsonantThatTakesRaMgo(rootLetter)) {
                    return internalThrowThing(throwIfIllegal,
+                                              errorBuf,
                                              "The head letter ra cannot be used with that root letter.");
                }
            } else if (EWC_la == headLetter) {
                if (!isConsonantThatTakesLaMgo(rootLetter)) {
                    return internalThrowThing(throwIfIllegal,
+                                              errorBuf,
                                              "The head letter la cannot be used with that root letter.");
                }
            } else if (EWC_sa == headLetter) {
@ -774,15 +804,18 @@ public class LegalTshegBar
                    // handle a common error specially:
                    if (EWC_la == rootLetter)
                        return internalThrowThing(throwIfIllegal,
+                                                  errorBuf,
                                                  "sa cannot be a head letter atop the root letter la.  You probably meant to have sa the root letter and la the subjoined letter.");

                    return internalThrowThing(throwIfIllegal,
+                                              errorBuf,
                                              "The head letter sa cannot be used with that root letter.");
                }
            } else {
                // '&#92;u0F6A' is not a valid head letter, even for
                // "rnya".  Use EWC_ra instead.
                return internalThrowThing(throwIfIllegal,
+                                          errorBuf,
                                          "The head letter given is not valid.");
            }
        } // headLetter tests
@ -796,16 +829,20 @@ public class LegalTshegBar
                {
                    if (EWC_achen == vowel)
                        return internalThrowThing(throwIfIllegal,
+                                                  errorBuf,
                                                  "The vowel given is not valid.  Use EW_ABSENT for the EWC_achen sound.");
                    if ('\u0F71' == vowel)
                        return internalThrowThing(throwIfIllegal,
-                                                  "a-chung cannot be used in a simple Tibetan syllable.");
+                                                  errorBuf,
+                                                  "a-chung cannot be used in a simple Tibetan syllable."); // DLC FIXME: what about pA?
                    return internalThrowThing(throwIfIllegal,
+                                              errorBuf,
                                              "The vowel given is not valid.");
                }
        }

        // Phew.  We got here, so this combination of inputs is valid.
+        // Do nothing to errorBuf.
        return true;
    }

--- a/source/org/thdl/tib/text/tshegbar/LegalTshegBarTest.java
+++ b/source/org/thdl/tib/text/tshegbar/LegalTshegBarTest.java
@ -40,29 +40,123 @@ public class LegalTshegBarTest extends TestCase implements UnicodeConstants {

    /** Tests the getThdlWylie() method and one of the constructors. */
    public void testGetThdlWylie() {
-        assertTrue(new LegalTshegBar(EWC_ba, EWC_sa, EWC_ga, EWSUB_ra_btags,
+        assertTrue(new LegalTshegBar(EWC_ba, EWC_sa, EWC_ga, EWC_ra,
                                     false, true, EWC_la, EWC_sa, EWV_o).getThdlWylie().toString().equals("bsgrAols"));
        assertTrue(new LegalTshegBar(EWC_ba, EWC_sa, EWC_ga,
-                                     EWSUB_ra_btags, true, true,
+                                     EWC_ra, true, true,
                                     EWC_la, EWC_sa, EWV_o).getThdlWylie().toString().equals("bsgrwAols"));
        assertTrue(new LegalTshegBar(EWC_ba, EWC_sa, EWC_ga,
-                                     EWSUB_ra_btags, false, false,
+                                     EWC_ra, false, false,
                                     EWC_la, EWC_sa, EWV_o).getThdlWylie().toString().equals("bsgrols"));
+        assertTrue(new LegalTshegBar(EWC_ba, EW_ABSENT, EWC_ta,
+                                     EW_ABSENT, false, false,
+                                     EWC_nga, EW_ABSENT, EW_ABSENT).getThdlWylie().toString().equals("btang"));
+
+        // dga and dag are fun, as both are represented by "\u0F51\u0F42":
+        {
+            assertTrue(new LegalTshegBar(EWC_da, EW_ABSENT, EWC_ga,
+                                         EW_ABSENT, false, false,
+                                         EW_ABSENT, EW_ABSENT, EW_ABSENT).getThdlWylie().toString().equals("dga"));
+            assertTrue(new LegalTshegBar(EW_ABSENT, EW_ABSENT, EWC_da,
+                                         EW_ABSENT, false, false,
+                                         EWC_ga, EW_ABSENT, EW_ABSENT).getThdlWylie().toString().equals("dag"));
+        }
+
+        assertTrue(new LegalTshegBar(EW_ABSENT, EWC_ra, EWC_da,
+                                     EW_ABSENT, false, false,
+                                     EWC_ga, EW_ABSENT, EW_ABSENT).getThdlWylie().toString().equals("rdag"));
+        assertTrue(new LegalTshegBar(EWC_ba, EWC_ra, EWC_da,
+                                     EW_ABSENT, false, false,
+                                     EWC_ga, EW_ABSENT, EW_ABSENT).getThdlWylie().toString().equals("brdag"));
+
+        assertTrue(new LegalTshegBar(EW_ABSENT, EW_ABSENT, EWC_nga,
+                                     EW_ABSENT, false, false,
+                                     "\u0F60\u0F72", EW_ABSENT, EW_ABSENT).getThdlWylie().toString().equals("nga'i"));
+
+        assertTrue(new LegalTshegBar(EW_ABSENT, EW_ABSENT, EWC_nga,
+                                     EW_ABSENT, false, false,
+                                     null, EW_ABSENT, EW_ABSENT).getThdlWylie().toString().equals("nga"));
+
+        assertTrue(new LegalTshegBar(EW_ABSENT, EW_ABSENT, EWC_sa,
+                                     EWC_la, false, false,
+                                     null, EW_ABSENT, EW_ABSENT).getThdlWylie().toString().equals("sla"));
+
+        {
+            boolean threw = false;
+            try {
+                new LegalTshegBar(EW_ABSENT, EWC_sa, EWC_la,
+                                  EW_ABSENT, false, false,
+                                  null, EW_ABSENT, EW_ABSENT);
+            } catch (IllegalArgumentException e) {
+                threw = true;
+            }
+            assertTrue(threw);
+        }
    }

    /** Tests the formsLegalTshegBar(..) method. DLC FIXME: but
     * doesn't test it very well. */
    public void testFormsLegalTshegBar() {
+        StringBuffer eb = new StringBuffer();
+
        // Ensure that EWTS's jskad is not legal:
-        assertTrue(!LegalTshegBar.formsLegalTshegBar(EWC_ja, EWC_sa,
-                                                     EWC_ka, EW_ABSENT,
-                                                     false, false,
-                                                     EW_ABSENT, EWC_da,
-                                                     EW_ABSENT));
+        {
+            assertTrue(!LegalTshegBar.formsLegalTshegBar(EWC_ja, EWC_sa,
+                                                         EWC_ka, EW_ABSENT,
+                                                         false, false,
+                                                         EW_ABSENT, EWC_da,
+                                                         EW_ABSENT, eb));
+        }
+
        assertTrue(LegalTshegBar.formsLegalTshegBar(EWC_ba, EW_ABSENT,
                                                    EWC_ta, EW_ABSENT,
                                                    false, false,
                                                    EWC_da, EW_ABSENT,
-                                                    EW_ABSENT));
+                                                    EW_ABSENT, eb));
+        
+        // test that there's only one way to make dwa:
+        assertTrue(!LegalTshegBar.formsLegalTshegBar(EW_ABSENT, EW_ABSENT,
+                                                     EWC_da, EWSUB_wa_zur,
+                                                     false, false,
+                                                     EW_ABSENT, EW_ABSENT,
+                                                     EW_ABSENT, eb));
+        assertTrue(!LegalTshegBar.formsLegalTshegBar(EW_ABSENT, EW_ABSENT,
+                                                     EWC_da, EWC_wa,
+                                                     false, false,
+                                                     EW_ABSENT, EW_ABSENT,
+                                                     EW_ABSENT, eb));
+        boolean result
+            = LegalTshegBar.formsLegalTshegBar(EW_ABSENT, EW_ABSENT,
+                                               EWC_da, EW_ABSENT,
+                                               true, false,
+                                               EW_ABSENT, EW_ABSENT,
+                                               EW_ABSENT, eb);
+        assertTrue(eb.toString(), result);
+    }
+
+    /** Tests the behavior of the constructors. */
+    public void testConstructors() {
+        boolean x;
+        
+        x = false;
+        try {
+            new LegalTshegBar(EWC_ba, EWC_sa, EWC_ga,
+                              EWSUB_ra_btags, false, false,
+                              EWC_la, EWC_sa, EWV_o);
+        } catch (IllegalArgumentException e) {
+            x = true;
+        }
+        assertTrue(x);
+
+        x = false;
+        try {
+            new LegalTshegBar(EWC_ba, EWC_sa, EWC_ga,
+                              EWSUB_ra_btags, false, false,
+                              new String(new char[] { EWC_la }), EWC_sa,
+                              EWV_o);
+        } catch (IllegalArgumentException e) {
+            x = true;
+        }
+        assertTrue(x);
    }
 }
--- a/source/org/thdl/tib/text/tshegbar/UnicodeUtils.java
+++ b/source/org/thdl/tib/text/tshegbar/UnicodeUtils.java
@ -30,11 +30,11 @@ public class UnicodeUtils implements UnicodeConstants {
    /** Returns true iff x is a Unicode codepoint that represents a
        consonant or two-consonant stack that has a Unicode code
        point.  Returns true only for the usual suspects (like
-        <code>&#92;u0F40</code>) and for Sanskrit consonants (like
-        <code>&#92;u0F71</code>) and the simple two-consonant stacks in
-        Unicode (like <code>&#92;u0F43</code>).  Returns false for, among
+        <code>U+0F40</code>) and for Sanskrit consonants (like
+        <code>U+0F71</code>) and the simple two-consonant stacks in
+        Unicode (like <code>U+0F43</code>).  Returns false for, among
        other things, subjoined consonants like
-        <code>&#92;u0F90</code>. */
+        <code>U+0F90</code>. */
    public static boolean isNonSubjoinedConsonant(char x) {
        return ((x != '\u0F48' /* reserved in Unicode 3.2, but not in use */)
                && (x >= '\u0F40' && x <= '\u0F6A'));
@ -43,11 +43,11 @@ public class UnicodeUtils implements UnicodeConstants {
    /** Returns true iff x is a Unicode codepoint that represents a
        subjoined consonant or subjoined two-consonant stack that has
        a Unicode code point.  Returns true only for the usual
-        suspects (like <code>&#92;u0F90</code>) and for Sanskrit
-        consonants (like <code>&#92;u0F9C</code>) and the simple
-        two-consonant stacks in Unicode (like <code>&#92;u0FAC</code>).
+        suspects (like <code>U+0F90</code>) and for Sanskrit
+        consonants (like <code>U+0F9C</code>) and the simple
+        two-consonant stacks in Unicode (like <code>U+0FAC</code>).
        Returns false for, among other things, non-subjoined
-        consonants like <code>&#92;u0F40</code>. */
+        consonants like <code>U+0F40</code>. */
    public static boolean isSubjoinedConsonant(char x) {
        return ((x != '\u0F98' /* reserved in Unicode 3.2, but not in use */)
                && (x >= '\u0F90' && x <= '\u0FBC'));
@ -56,13 +56,13 @@ public class UnicodeUtils implements UnicodeConstants {
    /** Returns true iff x is the preferred representation of a
        Tibetan or Sanskrit consonant and cannot be broken down any
        further.  Returns false for, among other things, subjoined
-        consonants like <code>&#92;u0F90</code>, two-component consonants
-        like <code>&#92;u0F43</code>, and fixed-form consonants like
-        '&#92;u0F6A'.  The new consonants (for transcribing Chinese, I
-        believe) "&#92;u0F55&#92;u0F39" (which EWTS calls "fa"),
-        "&#92;u0F56&#92;u0F39" ("va"), and "&#92;u0F5F&#92;u0F39" ("Dza") are
-        two-codepoint sequences, but you should be aware of them
-        also. */
+        consonants like <code>U+0F90</code>, two-component consonants
+        like <code>U+0F43</code>, and fixed-form consonants like
+        <code>U+0F6A</code>.  The new consonants (for transcribing
+        Chinese, I believe) "&#92;u0F55&#92;u0F39" (which EWTS calls
+        "fa"), "&#92;u0F56&#92;u0F39" ("va"), and
+        "&#92;u0F5F&#92;u0F39" ("Dza") are two-codepoint sequences,
+        but you should be aware of them also. */
    public static boolean isPreferredFormOfConsonant(char x) {
        return ((x != '\u0F48' /* reserved in Unicode 3.2, but not in use */)
                && (x >= '\u0F40' && x <= '\u0F68')
@ -97,7 +97,7 @@ public class UnicodeUtils implements UnicodeConstants {
        Unicode codepoints, into either Normalization Form KD (NFKD),
        D (NFD), or THDL (NFTHDL), depending on the value of normForm.
        NFD and NFKD are specified by Unicode 3.2; NFTHDL is needed
-        for {@link org.thdl.tib.text.tshegbar#UnicodeGraphemeCluster}
+        for {@link org.thdl.tib.text.tshegbar.UnicodeGraphemeCluster}
        because NFKD normalizes <code>U+0F0C</code> and neither NFD
        nor NFKD breaks down <code>U+0F00</code> into its constituent
        codepoints.  NFTHDL uses a maximum of codepoints, and it never
@ -247,7 +247,7 @@ public class UnicodeUtils implements UnicodeConstants {

    /** Returns true iff ch corresponds to the Tibetan letter wa.
        Several Unicode codepoints correspond to the Tibetan letter
-        wa.  Oftentimes, <code>&#92;u0F5D</code> is thought of as the
+        wa.  Oftentimes, <code>U+0F5D</code> is thought of as the
        nominal representation. */
    public static boolean isWa(char ch) {
        return ('\u0F5D' == ch
@ -257,7 +257,7 @@ public class UnicodeUtils implements UnicodeConstants {

    /** Returns true iff ch corresponds to the Tibetan letter ya.
        Several Unicode codepoints correspond to the Tibetan letter
-        ya.  Oftentimes, <code>&#92;u0F61</code> is thought of as the
+        ya.  Oftentimes, <code>U+0F61</code> is thought of as the
        nominal representation. */
    public static boolean isYa(char ch) {
        return ('\u0F61' == ch
@ -267,7 +267,7 @@ public class UnicodeUtils implements UnicodeConstants {

    /** Returns true iff there exists at least one codepoint cp in
        unicodeString such that cp {@link #isRa(char) is ra} or contains
-        ra (like <code>&#92;u0F77</code>).  This method is not implemented
+        ra (like <code>U+0F77</code>).  This method is not implemented
        as fast as it could be.  It calls on the canonicalization code
        in order to maximize reuse and minimize the possibility of
        coder error. */
@ -298,6 +298,9 @@ public class UnicodeUtils implements UnicodeConstants {
            return "\\u" + Integer.toHexString((int)cp);
    }

+    /**
+     * Returns a human-readable, ASCII form of the String s of Unicode
+     * codepoints. */
    public static String unicodeStringToString(String s) {
        StringBuffer sb = new StringBuffer(s.length() * 6);
        for (int i = 0; i < s.length(); i++) {
--- a/source/org/thdl/tib/text/tshegbar/UnicodeUtilsTest.java
+++ b/source/org/thdl/tib/text/tshegbar/UnicodeUtilsTest.java
@ -40,10 +40,13 @@ public class UnicodeUtilsTest extends TestCase implements UnicodeConstants {
 	}

    /** Tests Unicode Normalization form KD for Tibetan codepoints.
-        See Unicode, Inc.'s NormalizationTest-3.2.0.txt.  This
-        contains all test cases for
-        <code>U+0F00</code>-<code>U+0FFF</code> there, and a few
-        more. */
+     *  See Unicode, Inc.'s NormalizationTest-3.2.0.txt.  This
+     *  contains all test cases for
+     *  <code>U+0F00</code>-<code>U+0FFF</code> there, and a few more.
+     *  Tests both {@link
+     *  UnicodeUtils#toMostlyDecomposedUnicode(String, byte)} and
+     *  {@link UnicodeUtils#toMostlyDecomposedUnicode(StringBuffer,
+     *  byte)}.*/
    public void testMostlyNFKD() {
        assertTrue(UnicodeUtils.toMostlyDecomposedUnicode("\u0F0B", NORM_NFKD).equals("\u0F0B"));
        assertTrue(UnicodeUtils.toMostlyDecomposedUnicode("\u0F40", NORM_NFKD).equals("\u0F40"));
@ -112,10 +115,13 @@ public class UnicodeUtilsTest extends TestCase implements UnicodeConstants {
    }

    /** Tests Unicode Normalization form D for Tibetan codepoints.
-        See Unicode, Inc.'s NormalizationTest-3.2.0.txt.  This
-        contains all test cases for
-        <code>U+0F00</code>-<code>U+0FFF</code> there, and a few
-        more. */
+     *  See Unicode, Inc.'s NormalizationTest-3.2.0.txt.  This
+     *  contains all test cases for
+     *  <code>U+0F00</code>-<code>U+0FFF</code> there, and a few more.
+     *  Tests both {@link
+     *  UnicodeUtils#toMostlyDecomposedUnicode(String, byte)} and
+     *  {@link UnicodeUtils#toMostlyDecomposedUnicode(StringBuffer,
+     *  byte)}.*/
    public void testMostlyNFD() {
        assertTrue(UnicodeUtils.toMostlyDecomposedUnicode("\u0F0B", NORM_NFD).equals("\u0F0B"));
        assertTrue(UnicodeUtils.toMostlyDecomposedUnicode("\u0F40", NORM_NFD).equals("\u0F40"));
@ -184,10 +190,13 @@ public class UnicodeUtilsTest extends TestCase implements UnicodeConstants {
    }

    /** Tests Unicode Normalization form THDL for Tibetan codepoints.
-        See Unicode, Inc.'s NormalizationTest-3.2.0.txt.  This
-        contains all test cases for
-        <code>U+0F00</code>-<code>U+0FFF</code> there, and a few
-        more. */
+     *  See Unicode, Inc.'s NormalizationTest-3.2.0.txt.  This
+     *  contains all test cases for
+     *  <code>U+0F00</code>-<code>U+0FFF</code> there, and a few more.
+     *  Tests both {@link
+     *  UnicodeUtils#toMostlyDecomposedUnicode(String, byte)} and
+     *  {@link UnicodeUtils#toMostlyDecomposedUnicode(StringBuffer,
+     *  byte)}. */
    public void testMostlyNFTHDL() {
        assertTrue(UnicodeUtils.toMostlyDecomposedUnicode("\u0F0B", NORM_NFTHDL).equals("\u0F0B"));
        assertTrue(UnicodeUtils.toMostlyDecomposedUnicode("\u0F40", NORM_NFTHDL).equals("\u0F40"));
@ -253,10 +262,36 @@ public class UnicodeUtilsTest extends TestCase implements UnicodeConstants {
        assertTrue(UnicodeUtils.toMostlyDecomposedUnicode("\u0F79", NORM_NFTHDL).equals("\u0FB3\u0F71\u0F80"));
        assertTrue(UnicodeUtils.toMostlyDecomposedUnicode("\u0FB3\u0F81", NORM_NFTHDL).equals("\u0FB3\u0F71\u0F80"));
        assertTrue(UnicodeUtils.toMostlyDecomposedUnicode("\u0FB3\u0F71\u0F80", NORM_NFTHDL).equals("\u0FB3\u0F71\u0F80"));
+
+
+        assertTrue(UnicodeUtils.toMostlyDecomposedUnicode("", NORM_NFTHDL).equals(""));
+
+        {
+            StringBuffer sb = new StringBuffer("\u0FAC");
+            UnicodeUtils.toMostlyDecomposedUnicode(sb, NORM_NFTHDL);
+            assertTrue(sb.toString().equals("\u0FAB\u0FB7"));
+        }
+        {
+            StringBuffer sb = new StringBuffer("\u0F66");
+            UnicodeUtils.toMostlyDecomposedUnicode(sb, NORM_NFTHDL);
+            assertTrue(sb.toString().equals("\u0F66"));
+        }
+        {
+            StringBuffer sb = new StringBuffer("");
+            UnicodeUtils.toMostlyDecomposedUnicode(sb, NORM_NFTHDL);
+            assertTrue(sb.toString().equals(""));
+        }
    }

    /** Tests the containsRa method. */
    public void testContainsRa() {
+        assertTrue(!UnicodeUtils.containsRa('\u0F69'));
+        assertTrue(!UnicodeUtils.containsRa('\u0FB1'));
+        assertTrue(!UnicodeUtils.containsRa('\u0F48'));
+        assertTrue(!UnicodeUtils.containsRa('\u0060'));
+        assertTrue(!UnicodeUtils.containsRa('\uFFFF'));
+        assertTrue(!UnicodeUtils.containsRa('\uFFFF'));
+
        assertTrue(UnicodeUtils.containsRa('\u0FB2'));
        assertTrue(UnicodeUtils.containsRa('\u0F77'));
        assertTrue(UnicodeUtils.containsRa('\u0F76'));
@ -264,4 +299,84 @@ public class UnicodeUtilsTest extends TestCase implements UnicodeConstants {
        assertTrue(UnicodeUtils.containsRa('\u0F62'));
        assertTrue(UnicodeUtils.containsRa('\u0FBC'));
    }
+
+    /**
+     * Tests the {@link UnicodeUtils#unicodeStringToString(String)}
+     * method. */
+    public void testUnicodeStringToString() {
+        assertTrue(UnicodeUtils.unicodeStringToString("\u0000").equals("\\u0000"));
+        assertTrue(UnicodeUtils.unicodeStringToString("\u0001").equals("\\u0001"));
+        assertTrue(UnicodeUtils.unicodeStringToString("\u000F").equals("\\u000f"));
+        assertTrue(UnicodeUtils.unicodeStringToString("\u001F").equals("\\u001f"));
+        assertTrue(UnicodeUtils.unicodeStringToString("\u00fF").equals("\\u00ff"));
+        assertTrue(UnicodeUtils.unicodeStringToString("\u01fF").equals("\\u01ff"));
+        assertTrue(UnicodeUtils.unicodeStringToString("\u0ffF").equals("\\u0fff"));
+        assertTrue(UnicodeUtils.unicodeStringToString("\u1ffF").equals("\\u1fff"));
+        assertTrue(UnicodeUtils.unicodeStringToString("\ufffF").equals("\\uffff"));
+
+        assertTrue(UnicodeUtils.unicodeStringToString("\u0F00\u0091\uABCD\u0FFF\u0Ff1\uFFFF\u0000").equals("\\u0f00\\u0091\\uabcd\\u0fff\\u0ff1\\uffff\\u0000"));
+    }
+
+    /**
+     * Tests the {@link UnicodeUtils#unicodeCodepointToString(char)}
+     * method. */
+    public void testUnicodeCodepointToString() {
+        assertTrue(UnicodeUtils.unicodeCodepointToString('\u0000').equals("\\u0000"));
+        assertTrue(UnicodeUtils.unicodeCodepointToString('\u0001').equals("\\u0001"));
+        assertTrue(UnicodeUtils.unicodeCodepointToString('\u000F').equals("\\u000f"));
+        assertTrue(UnicodeUtils.unicodeCodepointToString('\u001F').equals("\\u001f"));
+        assertTrue(UnicodeUtils.unicodeCodepointToString('\u00fF').equals("\\u00ff"));
+        assertTrue(UnicodeUtils.unicodeCodepointToString('\u01fF').equals("\\u01ff"));
+        assertTrue(UnicodeUtils.unicodeCodepointToString('\u0ffF').equals("\\u0fff"));
+        assertTrue(UnicodeUtils.unicodeCodepointToString('\u1ffF').equals("\\u1fff"));
+        assertTrue(UnicodeUtils.unicodeCodepointToString('\ufffF').equals("\\uffff"));
+    }
+
+    /**
+     * Tests the {@link UnicodeUtils#isEntirelyTibetanUnicode(String)}
+     * method. */
+    public void testIsEntirelyTibetanUnicode() {
+        assertTrue(UnicodeUtils.isEntirelyTibetanUnicode("\u0F00\u0FFF\u0F00\u0F1e\u0F48")); // U+0F48 is reserved, but in the range.
+        assertTrue(!UnicodeUtils.isEntirelyTibetanUnicode("\u0F00\u1000\u0FFF\u0F00\u0F1e\u0F48")); // U+0F48 is reserved, but in the range.
+    }
+
+    /**
+     * Tests the {@link UnicodeUtils#isTibetanConsonant(char)}
+     * method. */
+    public void testIsTibetanConsonant() {
+        assertTrue(!UnicodeUtils.isTibetanConsonant('\u0000'));
+        assertTrue(!UnicodeUtils.isTibetanConsonant('\uF000'));
+        assertTrue(!UnicodeUtils.isTibetanConsonant('\u0EFF'));
+        assertTrue(!UnicodeUtils.isTibetanConsonant('\u1000'));
+        assertTrue(!UnicodeUtils.isTibetanConsonant('\u0F00'));
+        assertTrue(!UnicodeUtils.isTibetanConsonant('\u0FFF'));
+
+        assertTrue(UnicodeUtils.isTibetanConsonant('\u0FB2'));
+        assertTrue(UnicodeUtils.isTibetanConsonant('\u0F6A'));
+        assertTrue(UnicodeUtils.isTibetanConsonant('\u0F40'));
+        assertTrue(UnicodeUtils.isTibetanConsonant('\u0F50'));
+        assertTrue(UnicodeUtils.isTibetanConsonant('\u0FBC'));
+        assertTrue(UnicodeUtils.isTibetanConsonant('\u0FB9'));
+        assertTrue(UnicodeUtils.isTibetanConsonant('\u0FB0'));
+        assertTrue(UnicodeUtils.isTibetanConsonant('\u0FAD'));
+        assertTrue(UnicodeUtils.isTibetanConsonant('\u0FA6'));
+        assertTrue(UnicodeUtils.isTibetanConsonant('\u0F90'));
+        assertTrue(UnicodeUtils.isTibetanConsonant('\u0F91'));
+
+        // reserved codepoints:
+        assertTrue(!UnicodeUtils.isTibetanConsonant('\u0F48'));
+        assertTrue(!UnicodeUtils.isTibetanConsonant('\u0F98'));
+    }
+
+    /**
+     * Tests the {@link UnicodeUtils#isInTibetanRange(char)}
+     * method. */
+    public void testIsInTibetanRange() {
+        assertTrue(!UnicodeUtils.isInTibetanRange('\u0000'));
+        assertTrue(!UnicodeUtils.isInTibetanRange('\u0100'));
+        assertTrue(!UnicodeUtils.isInTibetanRange('\u1000'));
+        assertTrue(UnicodeUtils.isInTibetanRange('\u0F00'));
+        assertTrue(UnicodeUtils.isInTibetanRange('\u0FF0'));
+        assertTrue(UnicodeUtils.isInTibetanRange('\u0FFF'));
+    }
 }