diff --git a/source/org/thdl/tib/text/tshegbar/LegalTshegBar.java b/source/org/thdl/tib/text/tshegbar/LegalTshegBar.java
index ce782a4..d791e19 100644
--- a/source/org/thdl/tib/text/tshegbar/LegalTshegBar.java
+++ b/source/org/thdl/tib/text/tshegbar/LegalTshegBar.java
@@ -346,7 +346,7 @@ public class LegalTshegBar
         });
 
     /** Returns a two-codepoint string consisting of the Unicode
-     *  representation of what Extended Wylie calls
+     *  representation of what THDL Extended Wylie calls
      *  <code>'i</code>. */
     public static String getConnectiveCaseSuffix() {
         return connectiveCaseSuffix;
@@ -382,8 +382,8 @@ public class LegalTshegBar
 
 
     /** Returns an array of Unicode strings, all the legal suffix
-        particles.  In Extended Wylie, these are: <ul> <li>'i</li>
-        <li>'o</li> <li>'u</li> <li>'am</li> </ul>
+        particles.  In THDL Extended Wylie, these are: <ul>
+        <li>'i</li> <li>'o</li> <li>'u</li> <li>'am</li> </ul>
     
         <p>This is not very efficient.</p> */
     public static String[] getPossibleSuffixParticles() {
@@ -823,9 +823,9 @@ public class LegalTshegBar
       isTransliteratedSanskrit(), boolean isTransliteratedChinese()
       (design: contains fa or va, maybe?). */
 
-    /** Returns a StringBuffer that holds the extended wylie
+    /** Returns a StringBuffer that holds the THDL extended wylie
      *  representation of this syllable. */
-    public StringBuffer getExtendedWylie() {
+    public StringBuffer getThdlWylie() {
         StringBuffer sb = new StringBuffer();
         char rootLetter = getRootLetter();
         if (hasPrefix()) {
@@ -837,7 +837,7 @@ public class LegalTshegBar
 
             boolean disambiguatorNeeded = false;
             char prefix = getPrefix();
-            sb.append(UnicodeCharToExtendedWylie.getExtendedWylieForUnicodeChar(prefix));
+            sb.append(UnicodeCharToThdlWylie.getThdlWylieForUnicodeChar(prefix));
             if (!hasHeadLetter()) {
                 if (EWC_ya == rootLetter) {
                     if (isConsonantThatTakesYaBtags(prefix))
@@ -857,67 +857,67 @@ public class LegalTshegBar
                 sb.append(TibetanMachineWeb.WYLIE_DISAMBIGUATING_KEY);
         }
         if (hasHeadLetter())
-            sb.append(UnicodeCharToExtendedWylie.getExtendedWylieForUnicodeChar(getHeadLetter()));
-        sb.append(UnicodeCharToExtendedWylie.getExtendedWylieForUnicodeChar(rootLetter));
+            sb.append(UnicodeCharToThdlWylie.getThdlWylieForUnicodeChar(getHeadLetter()));
+        sb.append(UnicodeCharToThdlWylie.getThdlWylieForUnicodeChar(rootLetter));
         if (hasSubjoinedLetter())
-            sb.append(UnicodeCharToExtendedWylie.getExtendedWylieForUnicodeChar(getSubjoinedLetter()));
+            sb.append(UnicodeCharToThdlWylie.getThdlWylieForUnicodeChar(getSubjoinedLetter()));
         if (hasWaZurSubjoinedToRootLetter())
-            sb.append(UnicodeCharToExtendedWylie.getExtendedWylieForUnicodeChar(EWSUB_wa_zur));
+            sb.append(UnicodeCharToThdlWylie.getThdlWylieForUnicodeChar(EWSUB_wa_zur));
 
-        // a-chung is treated, in Extended Wylie, like a vowel.  I.e.,
-        // you don't have 'pAa', you have 'pA'.
+        // a-chung is treated, in THDL Extended Wylie, like a vowel.
+        // I.e., you don't have 'pAa', you have 'pA'.
         if (hasAChungOnRootLetter()) {
             if (hasExplicitVowel()) {
                 if (EWV_i == getVowel()) {
-                    sb.append(UnicodeCharToExtendedWylie.getExtendedWylieForUnicodeChar('\u0F73'));
+                    sb.append(UnicodeCharToThdlWylie.getThdlWylieForUnicodeChar('\u0F73'));
                 } else if (EWV_u == getVowel()) {
-                    sb.append(UnicodeCharToExtendedWylie.getExtendedWylieForUnicodeChar('\u0F75'));
+                    sb.append(UnicodeCharToThdlWylie.getThdlWylieForUnicodeChar('\u0F75'));
                 } else if (EWV_e == getVowel() || EWV_o == getVowel()) {
                     // The exception to the rule for a-chung and vowels...
 
                     // DLC FIXME: are these allowed in legal Tibetan?
                     // EWTS would have special cases for them if so,
                     // I'd wager...
-                    sb.append(UnicodeCharToExtendedWylie.getExtendedWylieForUnicodeChar(EW_achung));
-                    sb.append(UnicodeCharToExtendedWylie.getExtendedWylieForUnicodeChar(getVowel()));
+                    sb.append(UnicodeCharToThdlWylie.getThdlWylieForUnicodeChar(EW_achung));
+                    sb.append(UnicodeCharToThdlWylie.getThdlWylieForUnicodeChar(getVowel()));
                 } else {
                     ThdlDebug.abort("only simple vowels occur in this class, how did this get past internalLegalityTest(..)?");
                 }
             } else {
-                sb.append(UnicodeCharToExtendedWylie.getExtendedWylieForUnicodeChar(EW_achung));
+                sb.append(UnicodeCharToThdlWylie.getThdlWylieForUnicodeChar(EW_achung));
             }
         } else {
             if (hasExplicitVowel())
-                sb.append(UnicodeCharToExtendedWylie.getExtendedWylieForUnicodeChar(getVowel()));
+                sb.append(UnicodeCharToThdlWylie.getThdlWylieForUnicodeChar(getVowel()));
             else
                 sb.append("a");
         }
 
         if (hasSuffix()) {
             String suf = getSuffix();
-            sb.append(UnicodeCharToExtendedWylie.getExtendedWylieForUnicodeChar(suf.charAt(0)));
+            sb.append(UnicodeCharToThdlWylie.getThdlWylieForUnicodeChar(suf.charAt(0)));
             if (suf.length() > 1) {
                 // DLC assert, don't verify, that the length is two.
                 // This could change if I learn of more suffix
                 // particles.
                 ThdlDebug.verify(2 == suf.length());
-                sb.append(UnicodeCharToExtendedWylie.getExtendedWylieForUnicodeChar(suf.charAt(1)));
+                sb.append(UnicodeCharToThdlWylie.getThdlWylieForUnicodeChar(suf.charAt(1)));
             }
         }
         if (hasPostsuffix())
-            sb.append(UnicodeCharToExtendedWylie.getExtendedWylieForUnicodeChar(getPostsuffix()));
+            sb.append(UnicodeCharToThdlWylie.getThdlWylieForUnicodeChar(getPostsuffix()));
         return sb;
     }
 
 
-    // DLC: toXML for the dense XML
     /** Returns a <legalTibetanSyllable> element that contains only
-     *  the Extended Wylie transliteration for the whole syllable and a note that the . */
+     *  the THDL Extended Wylie transliteration for the whole syllable
+     *  and a note about the transliteration. */
     public String toConciseXML() {
         // DLC version-control the EWTS document. 0.5 is used below:
         return ("<legalTibetanSyllable "
                 + "transliterationType=\"THDL Extended Wylie 0.5\" "
-                + "transliteration=\"" + getExtendedWylie() + "\"" + "/>");
+                + "transliteration=\"" + getThdlWylie() + "\"" + "/>");
     }
 
     /** Returns a <legalTibetanSyllable> element that contains the
@@ -929,18 +929,18 @@ public class LegalTshegBar
                 + "transliterationType=\"THDL Extended Wylie 0.5\" "
                 + (hasPrefix()
                    ? ("prefix=\""
-                      + UnicodeCharToExtendedWylie.getExtendedWylieForUnicodeChar(getPrefix()) + "\" ")
+                      + UnicodeCharToThdlWylie.getThdlWylieForUnicodeChar(getPrefix()) + "\" ")
                    : "")
                 + (hasHeadLetter()
                    ? ("headLetter=\""
-                      + UnicodeCharToExtendedWylie.getExtendedWylieForUnicodeChar(getHeadLetter())
+                      + UnicodeCharToThdlWylie.getThdlWylieForUnicodeChar(getHeadLetter())
                       + "\" ")
                    : "")
                 + ("rootLetter=\""
-                   + UnicodeCharToExtendedWylie.getExtendedWylieForUnicodeChar(getRootLetter()) + "\" ")
+                   + UnicodeCharToThdlWylie.getThdlWylieForUnicodeChar(getRootLetter()) + "\" ")
                 + (hasSubjoinedLetter()
                    ? ("subjoinedLetter=\""
-                      + UnicodeCharToExtendedWylie.getExtendedWylieForUnicodeChar(getSubjoinedLetter())
+                      + UnicodeCharToThdlWylie.getThdlWylieForUnicodeChar(getSubjoinedLetter())
                       + "\" ")
                    : "")
                 + (hasWaZurSubjoinedToRootLetter()
@@ -953,17 +953,17 @@ public class LegalTshegBar
                 // DLC NOW: what about the root letter a, i.e. &#92;u0F68 ?  do we want the EWTS to be 'aa' ?
                 + ("vowel=\""
                    + (hasExplicitVowel()
-                      ? UnicodeCharToExtendedWylie.getExtendedWylieForUnicodeChar(getVowel())
+                      ? UnicodeCharToThdlWylie.getThdlWylieForUnicodeChar(getVowel())
                       : "a")
                    + "\" ")
                 + (hasSuffix()
                    ? ("suffix=\""
-                      + UnicodeCharToExtendedWylie.getExtendedWylieForUnicodeString(getSuffix())
+                      + UnicodeCharToThdlWylie.getThdlWylieForUnicodeString(getSuffix())
                       + "\" ")
                    : "")
                 + (hasPostsuffix()
                    ? ("postsuffix=\""
-                      + UnicodeCharToExtendedWylie.getExtendedWylieForUnicodeChar(getPostsuffix())
+                      + UnicodeCharToThdlWylie.getThdlWylieForUnicodeChar(getPostsuffix())
                       + "\" ")
                    : "")
                 + "/>");
diff --git a/source/org/thdl/tib/text/tshegbar/TshegBar.java b/source/org/thdl/tib/text/tshegbar/TshegBar.java
index 4eefed6..764144d 100644
--- a/source/org/thdl/tib/text/tshegbar/TshegBar.java
+++ b/source/org/thdl/tib/text/tshegbar/TshegBar.java
@@ -58,7 +58,7 @@ package org.thdl.tib.text.tshegbar;
  *  <p> This class allows for invalid tsheg bars, like those
  *  containing more than one prefix, more than two suffixes, an
  *  invalid postsuffix (secondary suffix), more than one consonant
- *  stack (excluding the special case of what we call in Extended
+ *  stack (excluding the special case of what we call in THDL Extended
  *  Wylie "'i", which is technically a consonant stack but is used in
  *  Tibetan like a suffix).</p>.
  *
diff --git a/source/org/thdl/tib/text/tshegbar/UnicodeConstants.java b/source/org/thdl/tib/text/tshegbar/UnicodeConstants.java
index 611abcd..8496989 100644
--- a/source/org/thdl/tib/text/tshegbar/UnicodeConstants.java
+++ b/source/org/thdl/tib/text/tshegbar/UnicodeConstants.java
@@ -40,6 +40,12 @@ public interface UnicodeConstants {
     static final byte NORM_NFD = 3;
     /** Refers to Normalization Form KD: */
     static final byte NORM_NFKD = 4;
+    /** Refers to Normalization Form THDL, which is NFD except for
+        <code>U+0F77</code> and <code>U+0F79</code>, which are
+        normalized according to NFKD.  This is the One True
+        Normalization Form, as it leaves no precomposed codepoints and
+        does not normalize <code>U+0F0C</code>. */
+    static final byte NORM_NFTHDL = 5;
 
 
     /** for those times when you need a char to represent a
diff --git a/source/org/thdl/tib/text/tshegbar/UnicodeUtils.java b/source/org/thdl/tib/text/tshegbar/UnicodeUtils.java
index f527438..150d57f 100644
--- a/source/org/thdl/tib/text/tshegbar/UnicodeUtils.java
+++ b/source/org/thdl/tib/text/tshegbar/UnicodeUtils.java
@@ -94,35 +94,34 @@ public class UnicodeUtils implements UnicodeConstants {
     }
 
     /** Puts the Tibetan codepoints in tibetanUnicode, a sequence of
-        Unicode codepoints, into Normalization Form KD (NFKD) as
-        specified by Unicode 3.2.  The Tibetan passages of the
-        returned string are in NFKD, but codepoints outside of the
-        range <code>U+0F00</code>-<code>U+0FFF</code> are not
-        necessarily put into NFKD.  This form uses a maximum of
+        Unicode codepoints, into either Normalization Form KD (NFKD),
+        D (NFD), or THDL (NFTHDL), depending on the value of normForm.
+        NFD and NFKD are specified by Unicode 3.2; NFTHDL is needed
+        for {@link org.thdl.tib.text.tshegbar#GraphemeCluster} because
+        NFKD normalizes <code>U+0F0C</code>.  NFTHDL uses a maximum of
         codepoints, and it never uses codepoints whose use has been
-        {@link #isDiscouraged(char) discouraged}.  It would be David
-        Chandler's very favorite form if not for the fact that
-        <code>U+0F0C</code> normalizes to <code>U+0F0B</code> in NFKD.
-        NFD is thus David Chandler's favorite, though it does not
-        decompose <code>U+0F77</code> and <code>U+0F79</code> (for
-        some reason, hopefully a well-thought-out one).
+        {@link #isDiscouraged(char) discouraged}.
 
-        <p>Recall that NFKD, as it applies to Tibetan codepoints, is
-        closed under string concatenation and under substringing.
-        Note again that if the input contains codepoints for which
-        {@link #isInTibetanRange(char)} is not true, then they will
-        not be modified.</p>
+        <p>The Tibetan passages of the returned string are in the
+        chosen normalized form, but codepoints outside of the {@link
+        #isInTibetanRange(char) range}
+        <code>U+0F00</code>-<code>U+0FFF</code> are not necessarily
+        put into normalized form.</p>
+
+        <p>Recall that normalized forms are not necessarily closed
+        under string concatenation, but are closed under
+        substringing.</p>
     
         <p>Note well that only well-formed input guarantees
         well-formed output.</p>
 
         @param tibetanUnicode the codepoints to be decomposed
-        @param normForm NORM_NFKD or NORM_NFD */
+        @param normForm NORM_NFKD, NORM_NFTHDL, or NORM_NFD */
     public static void toMostlyDecomposedUnicode(StringBuffer tibetanUnicode,
                                                  byte normForm)
     {
-        if (normForm != NORM_NFD && normForm != NORM_NFKD)
-            throw new IllegalArgumentException("normForm must be NORM_NFD or NORM_NFKD for decomposition to work");
+        if (normForm != NORM_NFD && normForm != NORM_NFKD && normForm != NORM_NFTHDL)
+            throw new IllegalArgumentException("normForm must be NORM_NFD, NORM_NFTHDL, or NORM_NFKD for decomposition to work");
         int offset = 0;
         while (offset < tibetanUnicode.length()) {
             String s
@@ -157,15 +156,19 @@ public class UnicodeUtils implements UnicodeConstants {
         and returns null for codepoints that are already normalized or
         are not in the Tibetan range of Unicode.
         @param tibetanUnicodeCP the codepoint to normalize
-        @param normalizationForm NORM_NFKD or NORM_NFD if you expect
-        something nontrivial to happen
+        @param normalizationForm NORM_NFTHDL, NORM_NFKD, or NORM_NFD
+        if you expect something nontrivial to happen
         @return null if tibetanUnicodeCP is already in the chosen
         normalized form, or a string of two or three codepoints
         otherwise */
-    public static String toNormalizedForm(char tibetanUnicodeCP, byte normalizationForm) {
+    public static String toNormalizedForm(char tibetanUnicodeCP,
+                                          byte normalizationForm)
+    {
         if (normalizationForm == NORM_NFKD
-            || normalizationForm == NORM_NFD) {
-            // Where not specified, the NFKD form is also the NFD form.
+            || normalizationForm == NORM_NFD
+            || normalizationForm == NORM_NFTHDL) {
+            // Where not specified, the NFKD and NFTHDL forms are
+            // identical to the NFD form.
             switch (tibetanUnicodeCP) {
             case '\u0F0C': return ((normalizationForm == NORM_NFKD)
                                    ? "\u0F0B" : null);
@@ -178,14 +181,25 @@ public class UnicodeUtils implements UnicodeConstants {
             case '\u0F73': return "\u0F71\u0F72";
             case '\u0F75': return "\u0F71\u0F74";
             case '\u0F76': return "\u0FB2\u0F80";
-            // I do not understand why NFD does not decompose this codepoint:
-            case '\u0F77': return ((normalizationForm == NORM_NFKD)
-                                   ? "\u0FB2\u0F71\u0F80" : null);
+            case '\u0F77': {
+                // I do not understand why NFD does not decompose this
+                // codepoint, hence NORM_NFTHDL does:
+                if (normalizationForm == NORM_NFKD
+                    || normalizationForm == NORM_NFTHDL)
+                    return "\u0FB2\u0F71\u0F80";
+                else
+                    return null;
+            }
             case '\u0F78': return "\u0FB3\u0F80";
-            // I do not understand why NFD does not decompose this codepoint:
-            case '\u0F79': return ((normalizationForm == NORM_NFKD)
-                                   ? "\u0FB3\u0F71\u0F80" : null);
-
+            case '\u0F79': {
+                // I do not understand why NFD does not decompose this
+                // codepoint, hence NORM_NFTHDL does:
+                if (normalizationForm == NORM_NFKD
+                    || normalizationForm == NORM_NFTHDL)
+                    return "\u0FB3\u0F71\u0F80";
+                else
+                    return null;
+            }
             case '\u0F81': return "\u0F71\u0F80";
             case '\u0F93': return "\u0F92\u0FB7";
             case '\u0F9D': return "\u0F9C\u0FB7";