diff --git a/source/org/thdl/tib/text/tshegbar/LegalTshegBar.java b/source/org/thdl/tib/text/tshegbar/LegalTshegBar.java index 5ba7bd7..aabc790 100644 --- a/source/org/thdl/tib/text/tshegbar/LegalTshegBar.java +++ b/source/org/thdl/tib/text/tshegbar/LegalTshegBar.java @@ -35,7 +35,7 @@ import org.thdl.util.ThdlDebug; *
  • It contains no vocalic modifications
  • * *
  • It may or may not contain an a-chung - * (\u0F71)
  • + * (\u0F71) * *
  • It contains at most one vowel from the set {EWV_a, EWV_i, * EWV_e, EWV_u}, and that vowel is on the root stack. The one @@ -44,7 +44,7 @@ import org.thdl.util.ThdlDebug; * *
  • It has at most one suffix, which is a single consonant or the * special connective case marker 'i (i.e., - * "\u0F60\u0F72").
  • + * "\u0F60\u0F72"). * * DLC FIXME: we must allow many suffixes. See Andres' e-mail below: @@ -70,7 +70,7 @@ And also there are cases where they combine. For ex you can have * *
  • It may contain a EWC_sa or EWC_da postsuffix iff there exists * a suffix (and a suffix that is not the special connective case - * marker 'i (i.e., "\u0F60\u0F72") (DLC FIXME: 'o and + * marker 'i (i.e., "\u0F60\u0F72") (DLC FIXME: 'o and * 'am maybe? I asked in the "Embarrasing error in wylie conversion" * bug report.).
  • * @@ -236,7 +236,7 @@ public class LegalTshegBar } /** Returns null if there is no suffix, or a string containing the - * one consonant or a string "\u0F60\u0F72" + * one consonant or a string "\u0F60\u0F72" * containing two characters in the special case that the suffix * is that connective case marker {@link * #getConnectiveCaseSuffix()}. */ @@ -335,8 +335,8 @@ public class LegalTshegBar // DLC unit test that each EWC is a nominal form of a consonant - // you could use either \u0F62 or \u0F6A, but we won't confuse - // this ra for a ra-mgo, so we use \u0F62, EWC_ra, not + // you could use either \u0F62 or \u0F6A, but we won't confuse + // this ra for a ra-mgo, so we use \u0F62, EWC_ra, not // EWSUB_ra_btags. } @@ -694,7 +694,7 @@ public class LegalTshegBar { if (!isNominalRepresentationOfConsonant(rootLetter)) return internalThrowThing(throwIfIllegal, - "The root letter must be one of the standard thirty Tibetan consonants, and must be represented nominally, not, for example, by FIXED-FORM RA (\\u0F6A)"); + "The root letter must be one of the standard thirty Tibetan consonants, and must be represented nominally, not, for example, by FIXED-FORM RA (\u0F6A)"); if (EW_ABSENT != prefix) { // Ensure that this prefix is one of the five prefixes, @@ -780,7 +780,7 @@ public class LegalTshegBar "The head letter sa cannot be used with that root letter."); } } else { - // '\u0F6A' is not a valid head letter, even for + // '\u0F6A' is not a valid head letter, even for // "rnya". Use EWC_ra instead. return internalThrowThing(throwIfIllegal, "The head letter given is not valid."); @@ -950,7 +950,7 @@ public class LegalTshegBar ? "hasAChungOnRootLetter=\"true\"" : "") - // DLC NOW: what about the root letter a, i.e. \u0F68 ? do we want the EWTS to be 'aa' ? + // DLC NOW: what about the root letter a, i.e. \u0F68 ? do we want the EWTS to be 'aa' ? + ("vowel=\"" + (hasExplicitVowel() ? UnicodeCharToExtendedWylie.getExtendedWylieForUnicodeChar(getVowel()) diff --git a/source/org/thdl/tib/text/tshegbar/UnicodeUtils.java b/source/org/thdl/tib/text/tshegbar/UnicodeUtils.java index 2d4346f..3cd7d7b 100644 --- a/source/org/thdl/tib/text/tshegbar/UnicodeUtils.java +++ b/source/org/thdl/tib/text/tshegbar/UnicodeUtils.java @@ -30,11 +30,11 @@ public class UnicodeUtils { /** Returns true iff x is a Unicode character that represents a consonant or two-consonant stack that has a Unicode code point. Returns true only for the usual suspects (like - \u0F40) and for Sanskrit consonants (like - \u0F71) and the simple two-consonant stacks in - Unicode (like \u0F43). Returns false for, among + \u0F40) and for Sanskrit consonants (like + \u0F71) and the simple two-consonant stacks in + Unicode (like \u0F43). Returns false for, among other things, subjoined consonants like - \u0F90. */ + \u0F90. */ public static boolean isNonSubjoinedConsonant(char x) { return ((x != '\u0F48' /* reserved in Unicode 3.2, but not in use */) && (x >= '\u0F40' && x <= '\u0F6A')); @@ -43,11 +43,11 @@ public class UnicodeUtils { /** Returns true iff x is a Unicode character that represents a subjoined consonant or subjoined two-consonant stack that has a Unicode code point. Returns true only for the usual - suspects (like \u0F90) and for Sanskrit - consonants (like \u0F9C) and the simple - two-consonant stacks in Unicode (like \u0FAC). + suspects (like \u0F90) and for Sanskrit + consonants (like \u0F9C) and the simple + two-consonant stacks in Unicode (like \u0FAC). Returns false for, among other things, non-subjoined - consonants like \u0F40. */ + consonants like \u0F40. */ public static boolean isSubjoinedConsonant(char x) { return ((x != '\u0F98' /* reserved in Unicode 3.2, but not in use */) && (x >= '\u0F90' && x <= '\u0FBC')); @@ -56,11 +56,11 @@ public class UnicodeUtils { /** Returns true iff x is the preferred representation of a Tibetan or Sanskrit consonant and cannot be broken down any further. Returns false for, among other things, subjoined - consonants like \u0F90, two-component consonants - like \u0F43, and fixed-form consonants like - '\u0F6A'. The new consonants (for transcribing Chinese, I - believe) "\u0F55\u0F39" (which EWTS calls "fa"), - "\u0F56\u0F39" ("va"), and "\u0F5F\u0F39" ("Dza") are + consonants like \u0F90, two-component consonants + like \u0F43, and fixed-form consonants like + '\u0F6A'. The new consonants (for transcribing Chinese, I + believe) "\u0F55\u0F39" (which EWTS calls "fa"), + "\u0F56\u0F39" ("va"), and "\u0F5F\u0F39" ("Dza") are two-character sequences, but you should be aware of them also. */ public static boolean isPreferredFormOfConsonant(char x) { @@ -177,9 +177,9 @@ public class UnicodeUtils { /** Returns true iff ch corresponds to the Tibetan letter ra. Several Unicode characters correspond to the Tibetan letter ra (in its subscribed form or otherwise). Oftentimes, - \u0F62 is thought of as the nominal + \u0F62 is thought of as the nominal representation. Returns false for some characters that - contain ra but are not merely ra, such as \u0F77 */ + contain ra but are not merely ra, such as \u0F77 */ public static boolean isRa(char ch) { return ('\u0F62' == ch || '\u0F6A' == ch @@ -189,7 +189,7 @@ public class UnicodeUtils { /** Returns true iff ch corresponds to the Tibetan letter wa. Several Unicode characters correspond to the Tibetan letter - wa. Oftentimes, \u0F5D is thought of as the + wa. Oftentimes, \u0F5D is thought of as the nominal representation. */ public static boolean isWa(char ch) { return ('\u0F5D' == ch @@ -199,7 +199,7 @@ public class UnicodeUtils { /** Returns true iff ch corresponds to the Tibetan letter ya. Several Unicode characters correspond to the Tibetan letter - ya. Oftentimes, \u0F61 is thought of as the + ya. Oftentimes, \u0F61 is thought of as the nominal representation. */ public static boolean isYa(char ch) { return ('\u0F61' == ch @@ -209,7 +209,7 @@ public class UnicodeUtils { /** Returns true iff there exists at least one character ch in unicodeString such that ch {@link #isRa(char) is ra} or contains - ra (like \u0F77). This method is not implemented + ra (like \u0F77). This method is not implemented as fast as it could be. It calls on the canonicalization code in order to maximize reuse and minimize the possibility of coder error. */