diff --git a/source/org/thdl/tib/text/tshegbar/LegalTshegBar.java b/source/org/thdl/tib/text/tshegbar/LegalTshegBar.java index 35950ac..13a0e72 100644 --- a/source/org/thdl/tib/text/tshegbar/LegalTshegBar.java +++ b/source/org/thdl/tib/text/tshegbar/LegalTshegBar.java @@ -100,13 +100,11 @@ And also there are cases where they combine. For ex you can have * * *
Note that this class uses only a subset of Unicode to represent - * consonants and vowels. In some situations, you should use {@link - * #EWSUB_wa_zur} to represent the consonant wa, while in others you - * should use {@link #EWC_wa}, even though you mean to subscribe a - * fixed-form wa. Basically, stick to the codepoints for which - * enumerations exist in {@link - * org.thdl.tib.text.tshegbar.UnicodeConstants} and use your common - * sense.
+ * consonants and vowels. You should always use the nominal form of + * a letter, e.g. {@link #EWC_wa}, not {@link #EWSUB_wa_zur}, to + * represent letters. (What if you mean to subscribe a fixed-form + * wa? Well, that's not a legal tsheg-bar, so you don't mean to do + * that.) * *For a pretty good, concise summary of the rules this class
* knows about, see Joe B. Wilson's Translating Buddhism from
@@ -142,8 +140,6 @@ public class LegalTshegBar
/** Do not use this constructor. */
private LegalTshegBar() { super(); }
- // DLC FIXME: do we want to accept EWC_ra or EWSUB_ra_btags for
- // the root letter, even if there is no head letter? Etc.
/** Constructs a valid Tibetan syllable or throws an exception.
* Use EW_ABSENT (or null in the case of suffix
) for
* those parts of the syllable that are absent. The root letter
@@ -180,7 +176,7 @@ public class LegalTshegBar
// copying is slightly inefficient because it is unnecessary
// since Java strings are read-only, but translating this code
// to C++ is easier this way.
- this.suffix = new String(suffix);
+ this.suffix = (suffix == null) ? null : new String(suffix);
this.postsuffix = postsuffix;
this.vowel = vowel;
@@ -198,7 +194,8 @@ public class LegalTshegBar
throws IllegalArgumentException
{
this(prefix, headLetter, rootLetter, subjoinedLetter,
- hasWaZur, hasAChung, new String(new char[] { suffix }),
+ hasWaZur, hasAChung,
+ (suffix == EW_ABSENT) ? null : new String(new char[] { suffix }),
postsuffix, vowel);
}
@@ -216,7 +213,10 @@ public class LegalTshegBar
}
/** Returns the non-EWSUB_wa_zur consonant subscribed to the root
- * consonant, or EW_ABSENT if none is. If you want to know if there is a wa-zur, use {@link #hasWaZurSubjoinedToRootLetter()}*/
+ * consonant, or EW_ABSENT if none is. If you want to know if
+ * there is a wa-zur, use {@link
+ * #hasWaZurSubjoinedToRootLetter()}. This returns EWC_ra, not
+ * EWSUB_ra_btags, etc. */
public char getSubjoinedLetter() {
return subjoinedLetter;
}
@@ -458,11 +458,11 @@ public class LegalTshegBar
if (EW_ABSENT == subjoinedLetter) {
return isConsonantThatTakesWaZur(rootLetter);
}
- if (EWSUB_ra_btags == subjoinedLetter) {
+ if (EWC_ra == subjoinedLetter) {
if (EWC_ga == rootLetter
|| EWC_da == rootLetter)
return true;
- } else if (EWSUB_ya_btags == subjoinedLetter) {
+ } else if (EWC_ya == subjoinedLetter) {
if (EWC_pha == rootLetter)
return true;
}
@@ -599,6 +599,9 @@ public class LegalTshegBar
* this is {@link #getConnectiveCaseSuffix()}
* @param postsuffix the optional postsuffix, which should be
* EWC_sa or EWC_da
+ * @param errorBuffer if non-null, and if the return code is
+ * false, then the reason that this is not a legal tsheg-bar will
+ * be appended to errorBuffer.
* @param vowel the optional vowel */
public static boolean formsLegalTshegBar(char prefix,
char headLetter,
@@ -608,12 +611,14 @@ public class LegalTshegBar
boolean hasAChung,
String suffix,
char postsuffix,
- char vowel)
+ char vowel,
+ StringBuffer errorBuffer)
{
try {
return internalLegalityTest(prefix, headLetter, rootLetter,
subjoinedLetter, hasWaZur, hasAChung,
- suffix, postsuffix, vowel, false);
+ suffix, postsuffix, vowel, false,
+ errorBuffer);
} catch (IllegalArgumentException e) {
throw new Error("This simply cannot happen, but it did.");
}
@@ -631,12 +636,15 @@ public class LegalTshegBar
boolean hasAChung,
char suffix,
char postsuffix,
- char vowel)
+ char vowel,
+ StringBuffer errorBuffer)
{
return formsLegalTshegBar(prefix, headLetter, rootLetter,
subjoinedLetter, hasWaZur, hasAChung,
- new String(new char[] { suffix }),
- postsuffix, vowel);
+ ((suffix == EW_ABSENT)
+ ? null
+ : new String(new char[] { suffix })),
+ postsuffix, vowel, errorBuffer);
}
@@ -659,12 +667,17 @@ public class LegalTshegBar
{
internalLegalityTest(prefix, headLetter, rootLetter,
subjoinedLetter, hasWaZur, hasAChung,
- suffix, postsuffix, vowel, true);
+ suffix, postsuffix, vowel, true, null);
}
/** Voodoo. Stand back. */
- private static boolean internalThrowThing(boolean doThrow, String msg)
+ private static boolean internalThrowThing(boolean doThrow,
+ StringBuffer errorBuf,
+ String msg)
{
+ if (errorBuf != null) {
+ errorBuf.append(msg);
+ }
if (doThrow)
throw new IllegalArgumentException(msg);
return false;
@@ -674,6 +687,8 @@ public class LegalTshegBar
* thrown, then this combination makes a legal Tibetan syllable.
* To learn about the arguments, see {@link
* #formsLegalTshegBar(char,char,char,char,boolean,boolean,String,char,char)}.
+ * @param errorBuf if non-null, the reason this is illegal will
+ * be written here, if this is illegal
* @return true if this syllable is legal, false if this syllable
* is illegal and throwIfIllegal is false, does not return if
* this syllable is illegal and throwIfIllegal is true
@@ -689,11 +704,13 @@ public class LegalTshegBar
String suffix,
char postsuffix,
char vowel,
- boolean throwIfIllegal)
+ boolean throwIfIllegal,
+ StringBuffer errorBuf)
throws IllegalArgumentException
{
if (!isNominalRepresentationOfConsonant(rootLetter))
return internalThrowThing(throwIfIllegal,
+ errorBuf,
"The root letter must be one of the standard thirty Tibetan consonants, and must be represented nominally, not, for example, by FIXED-FORM RA (\u0F6A)");
if (EW_ABSENT != prefix) {
@@ -701,28 +718,34 @@ public class LegalTshegBar
// and that it can go with this root letter:
if (!isNominalRepresentationOfPrefix(prefix))
return internalThrowThing(throwIfIllegal,
+ errorBuf,
"The prefix is not absent, so it must be one of the five possible prefixes.");
// DLC test that it can go with the root letter.
}
if (EW_ABSENT != subjoinedLetter) {
- if (EWSUB_ya_btags == subjoinedLetter) {
+ if (EWC_ya == subjoinedLetter) {
if (!isConsonantThatTakesYaBtags(rootLetter)) {
return internalThrowThing(throwIfIllegal,
+ errorBuf,
"Cannot subscribe ya-btags to that root letter.");
}
- } else if (EWSUB_ra_btags == subjoinedLetter) {
+ } else if (EWC_ra == subjoinedLetter) {
if (!isConsonantThatTakesRaBtags(rootLetter)) {
return internalThrowThing(throwIfIllegal,
+ errorBuf,
"Cannot subscribe ra-btags to that root letter.");
}
- } else if (EWSUB_la_btags == subjoinedLetter) {
+ } else if (EWC_la == subjoinedLetter) {
if (!isConsonantThatTakesLaBtags(rootLetter)) {
return internalThrowThing(throwIfIllegal,
+ errorBuf,
"Cannot subscribe la-btags to that root letter.");
}
- } else if (EWSUB_wa_zur == subjoinedLetter) {
- throw new Error("DLC FIXME: can this happen? wa-zur comes in via the boolean argument hasWaZur, not via subjoinedLetter.");
+ } else if (EWC_wa == subjoinedLetter) {
+ return internalThrowThing(throwIfIllegal,
+ errorBuf,
+ "The presence of wa-zur must be specified via a boolean parameter.");
} else {
// check for a common mistake:
if ('\u0FBA' == subjoinedLetter
@@ -730,9 +753,11 @@ public class LegalTshegBar
|| '\u0FBC' == subjoinedLetter)
{
return internalThrowThing(throwIfIllegal,
+ errorBuf,
"The subjoined letter given is subjoinable, but you gave the fixed-form variant, which is not used in Tibetan syllables but is sometimes used in Tibetan transliteration of Sanskrit, Chinese, or some non-Tibetan language.");
}
return internalThrowThing(throwIfIllegal,
+ errorBuf,
"The subjoined letter given is not one of the four consonants that may be subscribed.");
}
} // subjoinedLetter tests
@@ -743,10 +768,12 @@ public class LegalTshegBar
if (!getConnectiveCaseSuffix().equals(suffix)) {
if (suffix.length() != 1) {
return internalThrowThing(throwIfIllegal,
+ errorBuf,
"Illegal suffix -- not one of the legal complex suffixes like 'u, 'o, 'i, 'am.");
}
if (!isNominalRepresentationOfSimpleSuffix(suffix.charAt(0))) {
return internalThrowThing(throwIfIllegal,
+ errorBuf,
"Illegal suffix -- not one of the ten legal suffixes: "
+ UnicodeUtils.unicodeCodepointToString(suffix.charAt(0)));
}
@@ -755,6 +782,7 @@ public class LegalTshegBar
if (EW_ABSENT != postsuffix) {
if (null == suffix)
return internalThrowThing(throwIfIllegal,
+ errorBuf,
"You cannot have a postsuffix unless you also have a suffix.");
}
@@ -762,11 +790,13 @@ public class LegalTshegBar
if (EWC_ra == headLetter) {
if (!isConsonantThatTakesRaMgo(rootLetter)) {
return internalThrowThing(throwIfIllegal,
+ errorBuf,
"The head letter ra cannot be used with that root letter.");
}
} else if (EWC_la == headLetter) {
if (!isConsonantThatTakesLaMgo(rootLetter)) {
return internalThrowThing(throwIfIllegal,
+ errorBuf,
"The head letter la cannot be used with that root letter.");
}
} else if (EWC_sa == headLetter) {
@@ -774,15 +804,18 @@ public class LegalTshegBar
// handle a common error specially:
if (EWC_la == rootLetter)
return internalThrowThing(throwIfIllegal,
+ errorBuf,
"sa cannot be a head letter atop the root letter la. You probably meant to have sa the root letter and la the subjoined letter.");
return internalThrowThing(throwIfIllegal,
+ errorBuf,
"The head letter sa cannot be used with that root letter.");
}
} else {
// '\u0F6A' is not a valid head letter, even for
// "rnya". Use EWC_ra instead.
return internalThrowThing(throwIfIllegal,
+ errorBuf,
"The head letter given is not valid.");
}
} // headLetter tests
@@ -796,16 +829,20 @@ public class LegalTshegBar
{
if (EWC_achen == vowel)
return internalThrowThing(throwIfIllegal,
+ errorBuf,
"The vowel given is not valid. Use EW_ABSENT for the EWC_achen sound.");
if ('\u0F71' == vowel)
return internalThrowThing(throwIfIllegal,
- "a-chung cannot be used in a simple Tibetan syllable.");
+ errorBuf,
+ "a-chung cannot be used in a simple Tibetan syllable."); // DLC FIXME: what about pA?
return internalThrowThing(throwIfIllegal,
+ errorBuf,
"The vowel given is not valid.");
}
}
// Phew. We got here, so this combination of inputs is valid.
+ // Do nothing to errorBuf.
return true;
}
diff --git a/source/org/thdl/tib/text/tshegbar/LegalTshegBarTest.java b/source/org/thdl/tib/text/tshegbar/LegalTshegBarTest.java
index fc4a0c1..1749648 100644
--- a/source/org/thdl/tib/text/tshegbar/LegalTshegBarTest.java
+++ b/source/org/thdl/tib/text/tshegbar/LegalTshegBarTest.java
@@ -40,29 +40,123 @@ public class LegalTshegBarTest extends TestCase implements UnicodeConstants {
/** Tests the getThdlWylie() method and one of the constructors. */
public void testGetThdlWylie() {
- assertTrue(new LegalTshegBar(EWC_ba, EWC_sa, EWC_ga, EWSUB_ra_btags,
+ assertTrue(new LegalTshegBar(EWC_ba, EWC_sa, EWC_ga, EWC_ra,
false, true, EWC_la, EWC_sa, EWV_o).getThdlWylie().toString().equals("bsgrAols"));
assertTrue(new LegalTshegBar(EWC_ba, EWC_sa, EWC_ga,
- EWSUB_ra_btags, true, true,
+ EWC_ra, true, true,
EWC_la, EWC_sa, EWV_o).getThdlWylie().toString().equals("bsgrwAols"));
assertTrue(new LegalTshegBar(EWC_ba, EWC_sa, EWC_ga,
- EWSUB_ra_btags, false, false,
+ EWC_ra, false, false,
EWC_la, EWC_sa, EWV_o).getThdlWylie().toString().equals("bsgrols"));
+ assertTrue(new LegalTshegBar(EWC_ba, EW_ABSENT, EWC_ta,
+ EW_ABSENT, false, false,
+ EWC_nga, EW_ABSENT, EW_ABSENT).getThdlWylie().toString().equals("btang"));
+
+ // dga and dag are fun, as both are represented by "\u0F51\u0F42":
+ {
+ assertTrue(new LegalTshegBar(EWC_da, EW_ABSENT, EWC_ga,
+ EW_ABSENT, false, false,
+ EW_ABSENT, EW_ABSENT, EW_ABSENT).getThdlWylie().toString().equals("dga"));
+ assertTrue(new LegalTshegBar(EW_ABSENT, EW_ABSENT, EWC_da,
+ EW_ABSENT, false, false,
+ EWC_ga, EW_ABSENT, EW_ABSENT).getThdlWylie().toString().equals("dag"));
+ }
+
+ assertTrue(new LegalTshegBar(EW_ABSENT, EWC_ra, EWC_da,
+ EW_ABSENT, false, false,
+ EWC_ga, EW_ABSENT, EW_ABSENT).getThdlWylie().toString().equals("rdag"));
+ assertTrue(new LegalTshegBar(EWC_ba, EWC_ra, EWC_da,
+ EW_ABSENT, false, false,
+ EWC_ga, EW_ABSENT, EW_ABSENT).getThdlWylie().toString().equals("brdag"));
+
+ assertTrue(new LegalTshegBar(EW_ABSENT, EW_ABSENT, EWC_nga,
+ EW_ABSENT, false, false,
+ "\u0F60\u0F72", EW_ABSENT, EW_ABSENT).getThdlWylie().toString().equals("nga'i"));
+
+ assertTrue(new LegalTshegBar(EW_ABSENT, EW_ABSENT, EWC_nga,
+ EW_ABSENT, false, false,
+ null, EW_ABSENT, EW_ABSENT).getThdlWylie().toString().equals("nga"));
+
+ assertTrue(new LegalTshegBar(EW_ABSENT, EW_ABSENT, EWC_sa,
+ EWC_la, false, false,
+ null, EW_ABSENT, EW_ABSENT).getThdlWylie().toString().equals("sla"));
+
+ {
+ boolean threw = false;
+ try {
+ new LegalTshegBar(EW_ABSENT, EWC_sa, EWC_la,
+ EW_ABSENT, false, false,
+ null, EW_ABSENT, EW_ABSENT);
+ } catch (IllegalArgumentException e) {
+ threw = true;
+ }
+ assertTrue(threw);
+ }
}
/** Tests the formsLegalTshegBar(..) method. DLC FIXME: but
* doesn't test it very well. */
public void testFormsLegalTshegBar() {
+ StringBuffer eb = new StringBuffer();
+
// Ensure that EWTS's jskad is not legal:
- assertTrue(!LegalTshegBar.formsLegalTshegBar(EWC_ja, EWC_sa,
- EWC_ka, EW_ABSENT,
- false, false,
- EW_ABSENT, EWC_da,
- EW_ABSENT));
+ {
+ assertTrue(!LegalTshegBar.formsLegalTshegBar(EWC_ja, EWC_sa,
+ EWC_ka, EW_ABSENT,
+ false, false,
+ EW_ABSENT, EWC_da,
+ EW_ABSENT, eb));
+ }
+
assertTrue(LegalTshegBar.formsLegalTshegBar(EWC_ba, EW_ABSENT,
EWC_ta, EW_ABSENT,
false, false,
EWC_da, EW_ABSENT,
- EW_ABSENT));
+ EW_ABSENT, eb));
+
+ // test that there's only one way to make dwa:
+ assertTrue(!LegalTshegBar.formsLegalTshegBar(EW_ABSENT, EW_ABSENT,
+ EWC_da, EWSUB_wa_zur,
+ false, false,
+ EW_ABSENT, EW_ABSENT,
+ EW_ABSENT, eb));
+ assertTrue(!LegalTshegBar.formsLegalTshegBar(EW_ABSENT, EW_ABSENT,
+ EWC_da, EWC_wa,
+ false, false,
+ EW_ABSENT, EW_ABSENT,
+ EW_ABSENT, eb));
+ boolean result
+ = LegalTshegBar.formsLegalTshegBar(EW_ABSENT, EW_ABSENT,
+ EWC_da, EW_ABSENT,
+ true, false,
+ EW_ABSENT, EW_ABSENT,
+ EW_ABSENT, eb);
+ assertTrue(eb.toString(), result);
+ }
+
+ /** Tests the behavior of the constructors. */
+ public void testConstructors() {
+ boolean x;
+
+ x = false;
+ try {
+ new LegalTshegBar(EWC_ba, EWC_sa, EWC_ga,
+ EWSUB_ra_btags, false, false,
+ EWC_la, EWC_sa, EWV_o);
+ } catch (IllegalArgumentException e) {
+ x = true;
+ }
+ assertTrue(x);
+
+ x = false;
+ try {
+ new LegalTshegBar(EWC_ba, EWC_sa, EWC_ga,
+ EWSUB_ra_btags, false, false,
+ new String(new char[] { EWC_la }), EWC_sa,
+ EWV_o);
+ } catch (IllegalArgumentException e) {
+ x = true;
+ }
+ assertTrue(x);
}
}
diff --git a/source/org/thdl/tib/text/tshegbar/UnicodeUtils.java b/source/org/thdl/tib/text/tshegbar/UnicodeUtils.java
index 3df96fb..c23a60a 100644
--- a/source/org/thdl/tib/text/tshegbar/UnicodeUtils.java
+++ b/source/org/thdl/tib/text/tshegbar/UnicodeUtils.java
@@ -30,11 +30,11 @@ public class UnicodeUtils implements UnicodeConstants {
/** Returns true iff x is a Unicode codepoint that represents a
consonant or two-consonant stack that has a Unicode code
point. Returns true only for the usual suspects (like
- \u0F40
) and for Sanskrit consonants (like
- \u0F71
) and the simple two-consonant stacks in
- Unicode (like \u0F43
). Returns false for, among
+ U+0F40
) and for Sanskrit consonants (like
+ U+0F71
) and the simple two-consonant stacks in
+ Unicode (like U+0F43
). Returns false for, among
other things, subjoined consonants like
- \u0F90
. */
+ U+0F90
. */
public static boolean isNonSubjoinedConsonant(char x) {
return ((x != '\u0F48' /* reserved in Unicode 3.2, but not in use */)
&& (x >= '\u0F40' && x <= '\u0F6A'));
@@ -43,11 +43,11 @@ public class UnicodeUtils implements UnicodeConstants {
/** Returns true iff x is a Unicode codepoint that represents a
subjoined consonant or subjoined two-consonant stack that has
a Unicode code point. Returns true only for the usual
- suspects (like \u0F90
) and for Sanskrit
- consonants (like \u0F9C
) and the simple
- two-consonant stacks in Unicode (like \u0FAC
).
+ suspects (like U+0F90
) and for Sanskrit
+ consonants (like U+0F9C
) and the simple
+ two-consonant stacks in Unicode (like U+0FAC
).
Returns false for, among other things, non-subjoined
- consonants like \u0F40
. */
+ consonants like U+0F40
. */
public static boolean isSubjoinedConsonant(char x) {
return ((x != '\u0F98' /* reserved in Unicode 3.2, but not in use */)
&& (x >= '\u0F90' && x <= '\u0FBC'));
@@ -56,13 +56,13 @@ public class UnicodeUtils implements UnicodeConstants {
/** Returns true iff x is the preferred representation of a
Tibetan or Sanskrit consonant and cannot be broken down any
further. Returns false for, among other things, subjoined
- consonants like \u0F90
, two-component consonants
- like \u0F43
, and fixed-form consonants like
- '\u0F6A'. The new consonants (for transcribing Chinese, I
- believe) "\u0F55\u0F39" (which EWTS calls "fa"),
- "\u0F56\u0F39" ("va"), and "\u0F5F\u0F39" ("Dza") are
- two-codepoint sequences, but you should be aware of them
- also. */
+ consonants like U+0F90
, two-component consonants
+ like U+0F43
, and fixed-form consonants like
+ U+0F6A
. The new consonants (for transcribing
+ Chinese, I believe) "\u0F55\u0F39" (which EWTS calls
+ "fa"), "\u0F56\u0F39" ("va"), and
+ "\u0F5F\u0F39" ("Dza") are two-codepoint sequences,
+ but you should be aware of them also. */
public static boolean isPreferredFormOfConsonant(char x) {
return ((x != '\u0F48' /* reserved in Unicode 3.2, but not in use */)
&& (x >= '\u0F40' && x <= '\u0F68')
@@ -97,7 +97,7 @@ public class UnicodeUtils implements UnicodeConstants {
Unicode codepoints, into either Normalization Form KD (NFKD),
D (NFD), or THDL (NFTHDL), depending on the value of normForm.
NFD and NFKD are specified by Unicode 3.2; NFTHDL is needed
- for {@link org.thdl.tib.text.tshegbar#UnicodeGraphemeCluster}
+ for {@link org.thdl.tib.text.tshegbar.UnicodeGraphemeCluster}
because NFKD normalizes U+0F0C
and neither NFD
nor NFKD breaks down U+0F00
into its constituent
codepoints. NFTHDL uses a maximum of codepoints, and it never
@@ -247,7 +247,7 @@ public class UnicodeUtils implements UnicodeConstants {
/** Returns true iff ch corresponds to the Tibetan letter wa.
Several Unicode codepoints correspond to the Tibetan letter
- wa. Oftentimes, \u0F5D
is thought of as the
+ wa. Oftentimes, U+0F5D
is thought of as the
nominal representation. */
public static boolean isWa(char ch) {
return ('\u0F5D' == ch
@@ -257,7 +257,7 @@ public class UnicodeUtils implements UnicodeConstants {
/** Returns true iff ch corresponds to the Tibetan letter ya.
Several Unicode codepoints correspond to the Tibetan letter
- ya. Oftentimes, \u0F61
is thought of as the
+ ya. Oftentimes, U+0F61
is thought of as the
nominal representation. */
public static boolean isYa(char ch) {
return ('\u0F61' == ch
@@ -267,7 +267,7 @@ public class UnicodeUtils implements UnicodeConstants {
/** Returns true iff there exists at least one codepoint cp in
unicodeString such that cp {@link #isRa(char) is ra} or contains
- ra (like \u0F77
). This method is not implemented
+ ra (like U+0F77
). This method is not implemented
as fast as it could be. It calls on the canonicalization code
in order to maximize reuse and minimize the possibility of
coder error. */
@@ -298,6 +298,9 @@ public class UnicodeUtils implements UnicodeConstants {
return "\\u" + Integer.toHexString((int)cp);
}
+ /**
+ * Returns a human-readable, ASCII form of the String s of Unicode
+ * codepoints. */
public static String unicodeStringToString(String s) {
StringBuffer sb = new StringBuffer(s.length() * 6);
for (int i = 0; i < s.length(); i++) {
diff --git a/source/org/thdl/tib/text/tshegbar/UnicodeUtilsTest.java b/source/org/thdl/tib/text/tshegbar/UnicodeUtilsTest.java
index 8527867..309869d 100644
--- a/source/org/thdl/tib/text/tshegbar/UnicodeUtilsTest.java
+++ b/source/org/thdl/tib/text/tshegbar/UnicodeUtilsTest.java
@@ -40,10 +40,13 @@ public class UnicodeUtilsTest extends TestCase implements UnicodeConstants {
}
/** Tests Unicode Normalization form KD for Tibetan codepoints.
- See Unicode, Inc.'s NormalizationTest-3.2.0.txt. This
- contains all test cases for
- U+0F00
-U+0FFF
there, and a few
- more. */
+ * See Unicode, Inc.'s NormalizationTest-3.2.0.txt. This
+ * contains all test cases for
+ * U+0F00
-U+0FFF
there, and a few more.
+ * Tests both {@link
+ * UnicodeUtils#toMostlyDecomposedUnicode(String, byte)} and
+ * {@link UnicodeUtils#toMostlyDecomposedUnicode(StringBuffer,
+ * byte)}.*/
public void testMostlyNFKD() {
assertTrue(UnicodeUtils.toMostlyDecomposedUnicode("\u0F0B", NORM_NFKD).equals("\u0F0B"));
assertTrue(UnicodeUtils.toMostlyDecomposedUnicode("\u0F40", NORM_NFKD).equals("\u0F40"));
@@ -112,10 +115,13 @@ public class UnicodeUtilsTest extends TestCase implements UnicodeConstants {
}
/** Tests Unicode Normalization form D for Tibetan codepoints.
- See Unicode, Inc.'s NormalizationTest-3.2.0.txt. This
- contains all test cases for
- U+0F00
-U+0FFF
there, and a few
- more. */
+ * See Unicode, Inc.'s NormalizationTest-3.2.0.txt. This
+ * contains all test cases for
+ * U+0F00
-U+0FFF
there, and a few more.
+ * Tests both {@link
+ * UnicodeUtils#toMostlyDecomposedUnicode(String, byte)} and
+ * {@link UnicodeUtils#toMostlyDecomposedUnicode(StringBuffer,
+ * byte)}.*/
public void testMostlyNFD() {
assertTrue(UnicodeUtils.toMostlyDecomposedUnicode("\u0F0B", NORM_NFD).equals("\u0F0B"));
assertTrue(UnicodeUtils.toMostlyDecomposedUnicode("\u0F40", NORM_NFD).equals("\u0F40"));
@@ -184,10 +190,13 @@ public class UnicodeUtilsTest extends TestCase implements UnicodeConstants {
}
/** Tests Unicode Normalization form THDL for Tibetan codepoints.
- See Unicode, Inc.'s NormalizationTest-3.2.0.txt. This
- contains all test cases for
- U+0F00
-U+0FFF
there, and a few
- more. */
+ * See Unicode, Inc.'s NormalizationTest-3.2.0.txt. This
+ * contains all test cases for
+ * U+0F00
-U+0FFF
there, and a few more.
+ * Tests both {@link
+ * UnicodeUtils#toMostlyDecomposedUnicode(String, byte)} and
+ * {@link UnicodeUtils#toMostlyDecomposedUnicode(StringBuffer,
+ * byte)}. */
public void testMostlyNFTHDL() {
assertTrue(UnicodeUtils.toMostlyDecomposedUnicode("\u0F0B", NORM_NFTHDL).equals("\u0F0B"));
assertTrue(UnicodeUtils.toMostlyDecomposedUnicode("\u0F40", NORM_NFTHDL).equals("\u0F40"));
@@ -253,10 +262,36 @@ public class UnicodeUtilsTest extends TestCase implements UnicodeConstants {
assertTrue(UnicodeUtils.toMostlyDecomposedUnicode("\u0F79", NORM_NFTHDL).equals("\u0FB3\u0F71\u0F80"));
assertTrue(UnicodeUtils.toMostlyDecomposedUnicode("\u0FB3\u0F81", NORM_NFTHDL).equals("\u0FB3\u0F71\u0F80"));
assertTrue(UnicodeUtils.toMostlyDecomposedUnicode("\u0FB3\u0F71\u0F80", NORM_NFTHDL).equals("\u0FB3\u0F71\u0F80"));
+
+
+ assertTrue(UnicodeUtils.toMostlyDecomposedUnicode("", NORM_NFTHDL).equals(""));
+
+ {
+ StringBuffer sb = new StringBuffer("\u0FAC");
+ UnicodeUtils.toMostlyDecomposedUnicode(sb, NORM_NFTHDL);
+ assertTrue(sb.toString().equals("\u0FAB\u0FB7"));
+ }
+ {
+ StringBuffer sb = new StringBuffer("\u0F66");
+ UnicodeUtils.toMostlyDecomposedUnicode(sb, NORM_NFTHDL);
+ assertTrue(sb.toString().equals("\u0F66"));
+ }
+ {
+ StringBuffer sb = new StringBuffer("");
+ UnicodeUtils.toMostlyDecomposedUnicode(sb, NORM_NFTHDL);
+ assertTrue(sb.toString().equals(""));
+ }
}
/** Tests the containsRa method. */
public void testContainsRa() {
+ assertTrue(!UnicodeUtils.containsRa('\u0F69'));
+ assertTrue(!UnicodeUtils.containsRa('\u0FB1'));
+ assertTrue(!UnicodeUtils.containsRa('\u0F48'));
+ assertTrue(!UnicodeUtils.containsRa('\u0060'));
+ assertTrue(!UnicodeUtils.containsRa('\uFFFF'));
+ assertTrue(!UnicodeUtils.containsRa('\uFFFF'));
+
assertTrue(UnicodeUtils.containsRa('\u0FB2'));
assertTrue(UnicodeUtils.containsRa('\u0F77'));
assertTrue(UnicodeUtils.containsRa('\u0F76'));
@@ -264,4 +299,84 @@ public class UnicodeUtilsTest extends TestCase implements UnicodeConstants {
assertTrue(UnicodeUtils.containsRa('\u0F62'));
assertTrue(UnicodeUtils.containsRa('\u0FBC'));
}
+
+ /**
+ * Tests the {@link UnicodeUtils#unicodeStringToString(String)}
+ * method. */
+ public void testUnicodeStringToString() {
+ assertTrue(UnicodeUtils.unicodeStringToString("\u0000").equals("\\u0000"));
+ assertTrue(UnicodeUtils.unicodeStringToString("\u0001").equals("\\u0001"));
+ assertTrue(UnicodeUtils.unicodeStringToString("\u000F").equals("\\u000f"));
+ assertTrue(UnicodeUtils.unicodeStringToString("\u001F").equals("\\u001f"));
+ assertTrue(UnicodeUtils.unicodeStringToString("\u00fF").equals("\\u00ff"));
+ assertTrue(UnicodeUtils.unicodeStringToString("\u01fF").equals("\\u01ff"));
+ assertTrue(UnicodeUtils.unicodeStringToString("\u0ffF").equals("\\u0fff"));
+ assertTrue(UnicodeUtils.unicodeStringToString("\u1ffF").equals("\\u1fff"));
+ assertTrue(UnicodeUtils.unicodeStringToString("\ufffF").equals("\\uffff"));
+
+ assertTrue(UnicodeUtils.unicodeStringToString("\u0F00\u0091\uABCD\u0FFF\u0Ff1\uFFFF\u0000").equals("\\u0f00\\u0091\\uabcd\\u0fff\\u0ff1\\uffff\\u0000"));
+ }
+
+ /**
+ * Tests the {@link UnicodeUtils#unicodeCodepointToString(char)}
+ * method. */
+ public void testUnicodeCodepointToString() {
+ assertTrue(UnicodeUtils.unicodeCodepointToString('\u0000').equals("\\u0000"));
+ assertTrue(UnicodeUtils.unicodeCodepointToString('\u0001').equals("\\u0001"));
+ assertTrue(UnicodeUtils.unicodeCodepointToString('\u000F').equals("\\u000f"));
+ assertTrue(UnicodeUtils.unicodeCodepointToString('\u001F').equals("\\u001f"));
+ assertTrue(UnicodeUtils.unicodeCodepointToString('\u00fF').equals("\\u00ff"));
+ assertTrue(UnicodeUtils.unicodeCodepointToString('\u01fF').equals("\\u01ff"));
+ assertTrue(UnicodeUtils.unicodeCodepointToString('\u0ffF').equals("\\u0fff"));
+ assertTrue(UnicodeUtils.unicodeCodepointToString('\u1ffF').equals("\\u1fff"));
+ assertTrue(UnicodeUtils.unicodeCodepointToString('\ufffF').equals("\\uffff"));
+ }
+
+ /**
+ * Tests the {@link UnicodeUtils#isEntirelyTibetanUnicode(String)}
+ * method. */
+ public void testIsEntirelyTibetanUnicode() {
+ assertTrue(UnicodeUtils.isEntirelyTibetanUnicode("\u0F00\u0FFF\u0F00\u0F1e\u0F48")); // U+0F48 is reserved, but in the range.
+ assertTrue(!UnicodeUtils.isEntirelyTibetanUnicode("\u0F00\u1000\u0FFF\u0F00\u0F1e\u0F48")); // U+0F48 is reserved, but in the range.
+ }
+
+ /**
+ * Tests the {@link UnicodeUtils#isTibetanConsonant(char)}
+ * method. */
+ public void testIsTibetanConsonant() {
+ assertTrue(!UnicodeUtils.isTibetanConsonant('\u0000'));
+ assertTrue(!UnicodeUtils.isTibetanConsonant('\uF000'));
+ assertTrue(!UnicodeUtils.isTibetanConsonant('\u0EFF'));
+ assertTrue(!UnicodeUtils.isTibetanConsonant('\u1000'));
+ assertTrue(!UnicodeUtils.isTibetanConsonant('\u0F00'));
+ assertTrue(!UnicodeUtils.isTibetanConsonant('\u0FFF'));
+
+ assertTrue(UnicodeUtils.isTibetanConsonant('\u0FB2'));
+ assertTrue(UnicodeUtils.isTibetanConsonant('\u0F6A'));
+ assertTrue(UnicodeUtils.isTibetanConsonant('\u0F40'));
+ assertTrue(UnicodeUtils.isTibetanConsonant('\u0F50'));
+ assertTrue(UnicodeUtils.isTibetanConsonant('\u0FBC'));
+ assertTrue(UnicodeUtils.isTibetanConsonant('\u0FB9'));
+ assertTrue(UnicodeUtils.isTibetanConsonant('\u0FB0'));
+ assertTrue(UnicodeUtils.isTibetanConsonant('\u0FAD'));
+ assertTrue(UnicodeUtils.isTibetanConsonant('\u0FA6'));
+ assertTrue(UnicodeUtils.isTibetanConsonant('\u0F90'));
+ assertTrue(UnicodeUtils.isTibetanConsonant('\u0F91'));
+
+ // reserved codepoints:
+ assertTrue(!UnicodeUtils.isTibetanConsonant('\u0F48'));
+ assertTrue(!UnicodeUtils.isTibetanConsonant('\u0F98'));
+ }
+
+ /**
+ * Tests the {@link UnicodeUtils#isInTibetanRange(char)}
+ * method. */
+ public void testIsInTibetanRange() {
+ assertTrue(!UnicodeUtils.isInTibetanRange('\u0000'));
+ assertTrue(!UnicodeUtils.isInTibetanRange('\u0100'));
+ assertTrue(!UnicodeUtils.isInTibetanRange('\u1000'));
+ assertTrue(UnicodeUtils.isInTibetanRange('\u0F00'));
+ assertTrue(UnicodeUtils.isInTibetanRange('\u0FF0'));
+ assertTrue(UnicodeUtils.isInTibetanRange('\u0FFF'));
+ }
}