So that Unicode escape sequences appear correctly in javadocs.

This commit is contained in:
dchandler 2002-12-09 02:29:09 +00:00
parent 35425f3753
commit 2d6c8be804
2 changed files with 27 additions and 27 deletions

View file

@ -35,7 +35,7 @@ import org.thdl.util.ThdlDebug;
* <li>It contains no vocalic modifications</li> * <li>It contains no vocalic modifications</li>
* *
* <li>It may or may not contain an a-chung * <li>It may or may not contain an a-chung
* (<code>\u0F71</code>)</li> * (<code>&#92;u0F71</code>)</li>
* *
* <li>It contains at most one vowel from the set {EWV_a, EWV_i, * <li>It contains at most one vowel from the set {EWV_a, EWV_i,
* EWV_e, EWV_u}, and that vowel is on the root stack. The one * EWV_e, EWV_u}, and that vowel is on the root stack. The one
@ -44,7 +44,7 @@ import org.thdl.util.ThdlDebug;
* *
* <li>It has at most one suffix, which is a single consonant or the * <li>It has at most one suffix, which is a single consonant or the
* special connective case marker 'i (i.e., * special connective case marker 'i (i.e.,
* <code>"\u0F60\u0F72"</code>).</li> * <code>"&#92;u0F60&#92;u0F72"</code>).</li>
* *
* *
DLC FIXME: we must allow many suffixes. See Andres' e-mail below: DLC FIXME: we must allow many suffixes. See Andres' e-mail below:
@ -70,7 +70,7 @@ And also there are cases where they combine. For ex you can have
* *
* <li>It may contain a EWC_sa or EWC_da postsuffix iff there exists * <li>It may contain a EWC_sa or EWC_da postsuffix iff there exists
* a suffix (and a suffix that is not the special connective case * a suffix (and a suffix that is not the special connective case
* marker 'i (i.e., <code>"\u0F60\u0F72"</code>) (DLC FIXME: 'o and * marker 'i (i.e., <code>"&#92;u0F60&#92;u0F72"</code>) (DLC FIXME: 'o and
* 'am maybe? I asked in the "Embarrasing error in wylie conversion" * 'am maybe? I asked in the "Embarrasing error in wylie conversion"
* bug report.).</li> * bug report.).</li>
* *
@ -236,7 +236,7 @@ public class LegalTshegBar
} }
/** Returns null if there is no suffix, or a string containing the /** Returns null if there is no suffix, or a string containing the
* one consonant or a string <code>"\u0F60\u0F72"</code> * one consonant or a string <code>"&#92;u0F60&#92;u0F72"</code>
* containing two characters in the special case that the suffix * containing two characters in the special case that the suffix
* is that connective case marker {@link * is that connective case marker {@link
* #getConnectiveCaseSuffix()}. */ * #getConnectiveCaseSuffix()}. */
@ -335,8 +335,8 @@ public class LegalTshegBar
// DLC unit test that each EWC is a nominal form of a consonant // DLC unit test that each EWC is a nominal form of a consonant
// you could use either \u0F62 or \u0F6A, but we won't confuse // you could use either &#92;u0F62 or &#92;u0F6A, but we won't confuse
// this ra for a ra-mgo, so we use \u0F62, EWC_ra, not // this ra for a ra-mgo, so we use &#92;u0F62, EWC_ra, not
// EWSUB_ra_btags. // EWSUB_ra_btags.
} }
@ -694,7 +694,7 @@ public class LegalTshegBar
{ {
if (!isNominalRepresentationOfConsonant(rootLetter)) if (!isNominalRepresentationOfConsonant(rootLetter))
return internalThrowThing(throwIfIllegal, return internalThrowThing(throwIfIllegal,
"The root letter must be one of the standard thirty Tibetan consonants, and must be represented nominally, not, for example, by FIXED-FORM RA (\\u0F6A)"); "The root letter must be one of the standard thirty Tibetan consonants, and must be represented nominally, not, for example, by FIXED-FORM RA (&#92;u0F6A)");
if (EW_ABSENT != prefix) { if (EW_ABSENT != prefix) {
// Ensure that this prefix is one of the five prefixes, // Ensure that this prefix is one of the five prefixes,
@ -780,7 +780,7 @@ public class LegalTshegBar
"The head letter sa cannot be used with that root letter."); "The head letter sa cannot be used with that root letter.");
} }
} else { } else {
// '\u0F6A' is not a valid head letter, even for // '&#92;u0F6A' is not a valid head letter, even for
// "rnya". Use EWC_ra instead. // "rnya". Use EWC_ra instead.
return internalThrowThing(throwIfIllegal, return internalThrowThing(throwIfIllegal,
"The head letter given is not valid."); "The head letter given is not valid.");
@ -950,7 +950,7 @@ public class LegalTshegBar
? "hasAChungOnRootLetter=\"true\"" ? "hasAChungOnRootLetter=\"true\""
: "") : "")
// DLC NOW: what about the root letter a, i.e. \u0F68 ? do we want the EWTS to be 'aa' ? // DLC NOW: what about the root letter a, i.e. &#92;u0F68 ? do we want the EWTS to be 'aa' ?
+ ("vowel=\"" + ("vowel=\""
+ (hasExplicitVowel() + (hasExplicitVowel()
? UnicodeCharToExtendedWylie.getExtendedWylieForUnicodeChar(getVowel()) ? UnicodeCharToExtendedWylie.getExtendedWylieForUnicodeChar(getVowel())

View file

@ -30,11 +30,11 @@ public class UnicodeUtils {
/** Returns true iff x is a Unicode character that represents a /** Returns true iff x is a Unicode character that represents a
consonant or two-consonant stack that has a Unicode code consonant or two-consonant stack that has a Unicode code
point. Returns true only for the usual suspects (like point. Returns true only for the usual suspects (like
<code>\u0F40</code>) and for Sanskrit consonants (like <code>&#92;u0F40</code>) and for Sanskrit consonants (like
<code>\u0F71</code>) and the simple two-consonant stacks in <code>&#92;u0F71</code>) and the simple two-consonant stacks in
Unicode (like <code>\u0F43</code>). Returns false for, among Unicode (like <code>&#92;u0F43</code>). Returns false for, among
other things, subjoined consonants like other things, subjoined consonants like
<code>\u0F90</code>. */ <code>&#92;u0F90</code>. */
public static boolean isNonSubjoinedConsonant(char x) { public static boolean isNonSubjoinedConsonant(char x) {
return ((x != '\u0F48' /* reserved in Unicode 3.2, but not in use */) return ((x != '\u0F48' /* reserved in Unicode 3.2, but not in use */)
&& (x >= '\u0F40' && x <= '\u0F6A')); && (x >= '\u0F40' && x <= '\u0F6A'));
@ -43,11 +43,11 @@ public class UnicodeUtils {
/** Returns true iff x is a Unicode character that represents a /** Returns true iff x is a Unicode character that represents a
subjoined consonant or subjoined two-consonant stack that has subjoined consonant or subjoined two-consonant stack that has
a Unicode code point. Returns true only for the usual a Unicode code point. Returns true only for the usual
suspects (like <code>\u0F90</code>) and for Sanskrit suspects (like <code>&#92;u0F90</code>) and for Sanskrit
consonants (like <code>\u0F9C</code>) and the simple consonants (like <code>&#92;u0F9C</code>) and the simple
two-consonant stacks in Unicode (like <code>\u0FAC</code>). two-consonant stacks in Unicode (like <code>&#92;u0FAC</code>).
Returns false for, among other things, non-subjoined Returns false for, among other things, non-subjoined
consonants like <code>\u0F40</code>. */ consonants like <code>&#92;u0F40</code>. */
public static boolean isSubjoinedConsonant(char x) { public static boolean isSubjoinedConsonant(char x) {
return ((x != '\u0F98' /* reserved in Unicode 3.2, but not in use */) return ((x != '\u0F98' /* reserved in Unicode 3.2, but not in use */)
&& (x >= '\u0F90' && x <= '\u0FBC')); && (x >= '\u0F90' && x <= '\u0FBC'));
@ -56,11 +56,11 @@ public class UnicodeUtils {
/** Returns true iff x is the preferred representation of a /** Returns true iff x is the preferred representation of a
Tibetan or Sanskrit consonant and cannot be broken down any Tibetan or Sanskrit consonant and cannot be broken down any
further. Returns false for, among other things, subjoined further. Returns false for, among other things, subjoined
consonants like <code>\u0F90</code>, two-component consonants consonants like <code>&#92;u0F90</code>, two-component consonants
like <code>\u0F43</code>, and fixed-form consonants like like <code>&#92;u0F43</code>, and fixed-form consonants like
'\u0F6A'. The new consonants (for transcribing Chinese, I '&#92;u0F6A'. The new consonants (for transcribing Chinese, I
believe) "\u0F55\u0F39" (which EWTS calls "fa"), believe) "&#92;u0F55&#92;u0F39" (which EWTS calls "fa"),
"\u0F56\u0F39" ("va"), and "\u0F5F\u0F39" ("Dza") are "&#92;u0F56&#92;u0F39" ("va"), and "&#92;u0F5F&#92;u0F39" ("Dza") are
two-character sequences, but you should be aware of them two-character sequences, but you should be aware of them
also. */ also. */
public static boolean isPreferredFormOfConsonant(char x) { public static boolean isPreferredFormOfConsonant(char x) {
@ -177,9 +177,9 @@ public class UnicodeUtils {
/** Returns true iff ch corresponds to the Tibetan letter ra. /** Returns true iff ch corresponds to the Tibetan letter ra.
Several Unicode characters correspond to the Tibetan letter ra Several Unicode characters correspond to the Tibetan letter ra
(in its subscribed form or otherwise). Oftentimes, (in its subscribed form or otherwise). Oftentimes,
<code>\u0F62</code> is thought of as the nominal <code>&#92;u0F62</code> is thought of as the nominal
representation. Returns false for some characters that representation. Returns false for some characters that
contain ra but are not merely ra, such as <code>\u0F77</code> */ contain ra but are not merely ra, such as <code>&#92;u0F77</code> */
public static boolean isRa(char ch) { public static boolean isRa(char ch) {
return ('\u0F62' == ch return ('\u0F62' == ch
|| '\u0F6A' == ch || '\u0F6A' == ch
@ -189,7 +189,7 @@ public class UnicodeUtils {
/** Returns true iff ch corresponds to the Tibetan letter wa. /** Returns true iff ch corresponds to the Tibetan letter wa.
Several Unicode characters correspond to the Tibetan letter Several Unicode characters correspond to the Tibetan letter
wa. Oftentimes, <code>\u0F5D</code> is thought of as the wa. Oftentimes, <code>&#92;u0F5D</code> is thought of as the
nominal representation. */ nominal representation. */
public static boolean isWa(char ch) { public static boolean isWa(char ch) {
return ('\u0F5D' == ch return ('\u0F5D' == ch
@ -199,7 +199,7 @@ public class UnicodeUtils {
/** Returns true iff ch corresponds to the Tibetan letter ya. /** Returns true iff ch corresponds to the Tibetan letter ya.
Several Unicode characters correspond to the Tibetan letter Several Unicode characters correspond to the Tibetan letter
ya. Oftentimes, <code>\u0F61</code> is thought of as the ya. Oftentimes, <code>&#92;u0F61</code> is thought of as the
nominal representation. */ nominal representation. */
public static boolean isYa(char ch) { public static boolean isYa(char ch) {
return ('\u0F61' == ch return ('\u0F61' == ch
@ -209,7 +209,7 @@ public class UnicodeUtils {
/** Returns true iff there exists at least one character ch in /** Returns true iff there exists at least one character ch in
unicodeString such that ch {@link #isRa(char) is ra} or contains unicodeString such that ch {@link #isRa(char) is ra} or contains
ra (like <code>\u0F77</code>). This method is not implemented ra (like <code>&#92;u0F77</code>). This method is not implemented
as fast as it could be. It calls on the canonicalization code as fast as it could be. It calls on the canonicalization code
in order to maximize reuse and minimize the possibility of in order to maximize reuse and minimize the possibility of
coder error. */ coder error. */