diff --git a/source/org/thdl/tib/text/tshegbar/LegalTshegBar.java b/source/org/thdl/tib/text/tshegbar/LegalTshegBar.java index 5ba7bd7..aabc790 100644 --- a/source/org/thdl/tib/text/tshegbar/LegalTshegBar.java +++ b/source/org/thdl/tib/text/tshegbar/LegalTshegBar.java @@ -35,7 +35,7 @@ import org.thdl.util.ThdlDebug; *
\u0F71
)\u0F71
)
*
* "\u0F60\u0F72"
)."\u0F60\u0F72"
).
*
*
DLC FIXME: we must allow many suffixes. See Andres' e-mail below:
@@ -70,7 +70,7 @@ And also there are cases where they combine. For ex you can have
*
* "\u0F60\u0F72"
) (DLC FIXME: 'o and
+ * marker 'i (i.e., "\u0F60\u0F72"
) (DLC FIXME: 'o and
* 'am maybe? I asked in the "Embarrasing error in wylie conversion"
* bug report.)."\u0F60\u0F72"
+ * one consonant or a string "\u0F60\u0F72"
* containing two characters in the special case that the suffix
* is that connective case marker {@link
* #getConnectiveCaseSuffix()}. */
@@ -335,8 +335,8 @@ public class LegalTshegBar
// DLC unit test that each EWC is a nominal form of a consonant
- // you could use either \u0F62 or \u0F6A, but we won't confuse
- // this ra for a ra-mgo, so we use \u0F62, EWC_ra, not
+ // you could use either \u0F62 or \u0F6A, but we won't confuse
+ // this ra for a ra-mgo, so we use \u0F62, EWC_ra, not
// EWSUB_ra_btags.
}
@@ -694,7 +694,7 @@ public class LegalTshegBar
{
if (!isNominalRepresentationOfConsonant(rootLetter))
return internalThrowThing(throwIfIllegal,
- "The root letter must be one of the standard thirty Tibetan consonants, and must be represented nominally, not, for example, by FIXED-FORM RA (\\u0F6A)");
+ "The root letter must be one of the standard thirty Tibetan consonants, and must be represented nominally, not, for example, by FIXED-FORM RA (\u0F6A)");
if (EW_ABSENT != prefix) {
// Ensure that this prefix is one of the five prefixes,
@@ -780,7 +780,7 @@ public class LegalTshegBar
"The head letter sa cannot be used with that root letter.");
}
} else {
- // '\u0F6A' is not a valid head letter, even for
+ // '\u0F6A' is not a valid head letter, even for
// "rnya". Use EWC_ra instead.
return internalThrowThing(throwIfIllegal,
"The head letter given is not valid.");
@@ -950,7 +950,7 @@ public class LegalTshegBar
? "hasAChungOnRootLetter=\"true\""
: "")
- // DLC NOW: what about the root letter a, i.e. \u0F68 ? do we want the EWTS to be 'aa' ?
+ // DLC NOW: what about the root letter a, i.e. \u0F68 ? do we want the EWTS to be 'aa' ?
+ ("vowel=\""
+ (hasExplicitVowel()
? UnicodeCharToExtendedWylie.getExtendedWylieForUnicodeChar(getVowel())
diff --git a/source/org/thdl/tib/text/tshegbar/UnicodeUtils.java b/source/org/thdl/tib/text/tshegbar/UnicodeUtils.java
index 2d4346f..3cd7d7b 100644
--- a/source/org/thdl/tib/text/tshegbar/UnicodeUtils.java
+++ b/source/org/thdl/tib/text/tshegbar/UnicodeUtils.java
@@ -30,11 +30,11 @@ public class UnicodeUtils {
/** Returns true iff x is a Unicode character that represents a
consonant or two-consonant stack that has a Unicode code
point. Returns true only for the usual suspects (like
- \u0F40
) and for Sanskrit consonants (like
- \u0F71
) and the simple two-consonant stacks in
- Unicode (like \u0F43
). Returns false for, among
+ \u0F40
) and for Sanskrit consonants (like
+ \u0F71
) and the simple two-consonant stacks in
+ Unicode (like \u0F43
). Returns false for, among
other things, subjoined consonants like
- \u0F90
. */
+ \u0F90
. */
public static boolean isNonSubjoinedConsonant(char x) {
return ((x != '\u0F48' /* reserved in Unicode 3.2, but not in use */)
&& (x >= '\u0F40' && x <= '\u0F6A'));
@@ -43,11 +43,11 @@ public class UnicodeUtils {
/** Returns true iff x is a Unicode character that represents a
subjoined consonant or subjoined two-consonant stack that has
a Unicode code point. Returns true only for the usual
- suspects (like \u0F90
) and for Sanskrit
- consonants (like \u0F9C
) and the simple
- two-consonant stacks in Unicode (like \u0FAC
).
+ suspects (like \u0F90
) and for Sanskrit
+ consonants (like \u0F9C
) and the simple
+ two-consonant stacks in Unicode (like \u0FAC
).
Returns false for, among other things, non-subjoined
- consonants like \u0F40
. */
+ consonants like \u0F40
. */
public static boolean isSubjoinedConsonant(char x) {
return ((x != '\u0F98' /* reserved in Unicode 3.2, but not in use */)
&& (x >= '\u0F90' && x <= '\u0FBC'));
@@ -56,11 +56,11 @@ public class UnicodeUtils {
/** Returns true iff x is the preferred representation of a
Tibetan or Sanskrit consonant and cannot be broken down any
further. Returns false for, among other things, subjoined
- consonants like \u0F90
, two-component consonants
- like \u0F43
, and fixed-form consonants like
- '\u0F6A'. The new consonants (for transcribing Chinese, I
- believe) "\u0F55\u0F39" (which EWTS calls "fa"),
- "\u0F56\u0F39" ("va"), and "\u0F5F\u0F39" ("Dza") are
+ consonants like \u0F90
, two-component consonants
+ like \u0F43
, and fixed-form consonants like
+ '\u0F6A'. The new consonants (for transcribing Chinese, I
+ believe) "\u0F55\u0F39" (which EWTS calls "fa"),
+ "\u0F56\u0F39" ("va"), and "\u0F5F\u0F39" ("Dza") are
two-character sequences, but you should be aware of them
also. */
public static boolean isPreferredFormOfConsonant(char x) {
@@ -177,9 +177,9 @@ public class UnicodeUtils {
/** Returns true iff ch corresponds to the Tibetan letter ra.
Several Unicode characters correspond to the Tibetan letter ra
(in its subscribed form or otherwise). Oftentimes,
- \u0F62
is thought of as the nominal
+ \u0F62
is thought of as the nominal
representation. Returns false for some characters that
- contain ra but are not merely ra, such as \u0F77
*/
+ contain ra but are not merely ra, such as \u0F77
*/
public static boolean isRa(char ch) {
return ('\u0F62' == ch
|| '\u0F6A' == ch
@@ -189,7 +189,7 @@ public class UnicodeUtils {
/** Returns true iff ch corresponds to the Tibetan letter wa.
Several Unicode characters correspond to the Tibetan letter
- wa. Oftentimes, \u0F5D
is thought of as the
+ wa. Oftentimes, \u0F5D
is thought of as the
nominal representation. */
public static boolean isWa(char ch) {
return ('\u0F5D' == ch
@@ -199,7 +199,7 @@ public class UnicodeUtils {
/** Returns true iff ch corresponds to the Tibetan letter ya.
Several Unicode characters correspond to the Tibetan letter
- ya. Oftentimes, \u0F61
is thought of as the
+ ya. Oftentimes, \u0F61
is thought of as the
nominal representation. */
public static boolean isYa(char ch) {
return ('\u0F61' == ch
@@ -209,7 +209,7 @@ public class UnicodeUtils {
/** Returns true iff there exists at least one character ch in
unicodeString such that ch {@link #isRa(char) is ra} or contains
- ra (like \u0F77
). This method is not implemented
+ ra (like \u0F77
). This method is not implemented
as fast as it could be. It calls on the canonicalization code
in order to maximize reuse and minimize the possibility of
coder error. */