More and better tests; fixed some bugs in LegalTshegBar.
This commit is contained in:
parent
35a9869aac
commit
2b81020b0e
4 changed files with 317 additions and 68 deletions
|
@ -100,13 +100,11 @@ And also there are cases where they combine. For ex you can have
|
||||||
* </ul>
|
* </ul>
|
||||||
*
|
*
|
||||||
* <p>Note that this class uses only a subset of Unicode to represent
|
* <p>Note that this class uses only a subset of Unicode to represent
|
||||||
* consonants and vowels. In some situations, you should use {@link
|
* consonants and vowels. You should always use the nominal form of
|
||||||
* #EWSUB_wa_zur} to represent the consonant wa, while in others you
|
* a letter, e.g. {@link #EWC_wa}, not {@link #EWSUB_wa_zur}, to
|
||||||
* should use {@link #EWC_wa}, even though you mean to subscribe a
|
* represent letters. (What if you mean to subscribe a fixed-form
|
||||||
* fixed-form wa. Basically, stick to the codepoints for which
|
* wa? Well, that's not a legal tsheg-bar, so you don't mean to do
|
||||||
* enumerations exist in {@link
|
* that.)</p>
|
||||||
* org.thdl.tib.text.tshegbar.UnicodeConstants} and use your common
|
|
||||||
* sense.</p>
|
|
||||||
*
|
*
|
||||||
* <p>For a pretty good, concise summary of the rules this class
|
* <p>For a pretty good, concise summary of the rules this class
|
||||||
* knows about, see Joe B. Wilson's <i>Translating Buddhism from
|
* knows about, see Joe B. Wilson's <i>Translating Buddhism from
|
||||||
|
@ -142,8 +140,6 @@ public class LegalTshegBar
|
||||||
/** Do not use this constructor. */
|
/** Do not use this constructor. */
|
||||||
private LegalTshegBar() { super(); }
|
private LegalTshegBar() { super(); }
|
||||||
|
|
||||||
// DLC FIXME: do we want to accept EWC_ra or EWSUB_ra_btags for
|
|
||||||
// the root letter, even if there is no head letter? Etc.
|
|
||||||
/** Constructs a valid Tibetan syllable or throws an exception.
|
/** Constructs a valid Tibetan syllable or throws an exception.
|
||||||
* Use EW_ABSENT (or null in the case of <code>suffix</code>) for
|
* Use EW_ABSENT (or null in the case of <code>suffix</code>) for
|
||||||
* those parts of the syllable that are absent. The root letter
|
* those parts of the syllable that are absent. The root letter
|
||||||
|
@ -180,7 +176,7 @@ public class LegalTshegBar
|
||||||
// copying is slightly inefficient because it is unnecessary
|
// copying is slightly inefficient because it is unnecessary
|
||||||
// since Java strings are read-only, but translating this code
|
// since Java strings are read-only, but translating this code
|
||||||
// to C++ is easier this way.
|
// to C++ is easier this way.
|
||||||
this.suffix = new String(suffix);
|
this.suffix = (suffix == null) ? null : new String(suffix);
|
||||||
|
|
||||||
this.postsuffix = postsuffix;
|
this.postsuffix = postsuffix;
|
||||||
this.vowel = vowel;
|
this.vowel = vowel;
|
||||||
|
@ -198,7 +194,8 @@ public class LegalTshegBar
|
||||||
throws IllegalArgumentException
|
throws IllegalArgumentException
|
||||||
{
|
{
|
||||||
this(prefix, headLetter, rootLetter, subjoinedLetter,
|
this(prefix, headLetter, rootLetter, subjoinedLetter,
|
||||||
hasWaZur, hasAChung, new String(new char[] { suffix }),
|
hasWaZur, hasAChung,
|
||||||
|
(suffix == EW_ABSENT) ? null : new String(new char[] { suffix }),
|
||||||
postsuffix, vowel);
|
postsuffix, vowel);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -216,7 +213,10 @@ public class LegalTshegBar
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns the non-EWSUB_wa_zur consonant subscribed to the root
|
/** Returns the non-EWSUB_wa_zur consonant subscribed to the root
|
||||||
* consonant, or EW_ABSENT if none is. If you want to know if there is a wa-zur, use {@link #hasWaZurSubjoinedToRootLetter()}*/
|
* consonant, or EW_ABSENT if none is. If you want to know if
|
||||||
|
* there is a wa-zur, use {@link
|
||||||
|
* #hasWaZurSubjoinedToRootLetter()}. This returns EWC_ra, not
|
||||||
|
* EWSUB_ra_btags, etc. */
|
||||||
public char getSubjoinedLetter() {
|
public char getSubjoinedLetter() {
|
||||||
return subjoinedLetter;
|
return subjoinedLetter;
|
||||||
}
|
}
|
||||||
|
@ -458,11 +458,11 @@ public class LegalTshegBar
|
||||||
if (EW_ABSENT == subjoinedLetter) {
|
if (EW_ABSENT == subjoinedLetter) {
|
||||||
return isConsonantThatTakesWaZur(rootLetter);
|
return isConsonantThatTakesWaZur(rootLetter);
|
||||||
}
|
}
|
||||||
if (EWSUB_ra_btags == subjoinedLetter) {
|
if (EWC_ra == subjoinedLetter) {
|
||||||
if (EWC_ga == rootLetter
|
if (EWC_ga == rootLetter
|
||||||
|| EWC_da == rootLetter)
|
|| EWC_da == rootLetter)
|
||||||
return true;
|
return true;
|
||||||
} else if (EWSUB_ya_btags == subjoinedLetter) {
|
} else if (EWC_ya == subjoinedLetter) {
|
||||||
if (EWC_pha == rootLetter)
|
if (EWC_pha == rootLetter)
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -599,6 +599,9 @@ public class LegalTshegBar
|
||||||
* this is {@link #getConnectiveCaseSuffix()}
|
* this is {@link #getConnectiveCaseSuffix()}
|
||||||
* @param postsuffix the optional postsuffix, which should be
|
* @param postsuffix the optional postsuffix, which should be
|
||||||
* EWC_sa or EWC_da
|
* EWC_sa or EWC_da
|
||||||
|
* @param errorBuffer if non-null, and if the return code is
|
||||||
|
* false, then the reason that this is not a legal tsheg-bar will
|
||||||
|
* be appended to errorBuffer.
|
||||||
* @param vowel the optional vowel */
|
* @param vowel the optional vowel */
|
||||||
public static boolean formsLegalTshegBar(char prefix,
|
public static boolean formsLegalTshegBar(char prefix,
|
||||||
char headLetter,
|
char headLetter,
|
||||||
|
@ -608,12 +611,14 @@ public class LegalTshegBar
|
||||||
boolean hasAChung,
|
boolean hasAChung,
|
||||||
String suffix,
|
String suffix,
|
||||||
char postsuffix,
|
char postsuffix,
|
||||||
char vowel)
|
char vowel,
|
||||||
|
StringBuffer errorBuffer)
|
||||||
{
|
{
|
||||||
try {
|
try {
|
||||||
return internalLegalityTest(prefix, headLetter, rootLetter,
|
return internalLegalityTest(prefix, headLetter, rootLetter,
|
||||||
subjoinedLetter, hasWaZur, hasAChung,
|
subjoinedLetter, hasWaZur, hasAChung,
|
||||||
suffix, postsuffix, vowel, false);
|
suffix, postsuffix, vowel, false,
|
||||||
|
errorBuffer);
|
||||||
} catch (IllegalArgumentException e) {
|
} catch (IllegalArgumentException e) {
|
||||||
throw new Error("This simply cannot happen, but it did.");
|
throw new Error("This simply cannot happen, but it did.");
|
||||||
}
|
}
|
||||||
|
@ -631,12 +636,15 @@ public class LegalTshegBar
|
||||||
boolean hasAChung,
|
boolean hasAChung,
|
||||||
char suffix,
|
char suffix,
|
||||||
char postsuffix,
|
char postsuffix,
|
||||||
char vowel)
|
char vowel,
|
||||||
|
StringBuffer errorBuffer)
|
||||||
{
|
{
|
||||||
return formsLegalTshegBar(prefix, headLetter, rootLetter,
|
return formsLegalTshegBar(prefix, headLetter, rootLetter,
|
||||||
subjoinedLetter, hasWaZur, hasAChung,
|
subjoinedLetter, hasWaZur, hasAChung,
|
||||||
new String(new char[] { suffix }),
|
((suffix == EW_ABSENT)
|
||||||
postsuffix, vowel);
|
? null
|
||||||
|
: new String(new char[] { suffix })),
|
||||||
|
postsuffix, vowel, errorBuffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -659,12 +667,17 @@ public class LegalTshegBar
|
||||||
{
|
{
|
||||||
internalLegalityTest(prefix, headLetter, rootLetter,
|
internalLegalityTest(prefix, headLetter, rootLetter,
|
||||||
subjoinedLetter, hasWaZur, hasAChung,
|
subjoinedLetter, hasWaZur, hasAChung,
|
||||||
suffix, postsuffix, vowel, true);
|
suffix, postsuffix, vowel, true, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Voodoo. Stand back. */
|
/** Voodoo. Stand back. */
|
||||||
private static boolean internalThrowThing(boolean doThrow, String msg)
|
private static boolean internalThrowThing(boolean doThrow,
|
||||||
|
StringBuffer errorBuf,
|
||||||
|
String msg)
|
||||||
{
|
{
|
||||||
|
if (errorBuf != null) {
|
||||||
|
errorBuf.append(msg);
|
||||||
|
}
|
||||||
if (doThrow)
|
if (doThrow)
|
||||||
throw new IllegalArgumentException(msg);
|
throw new IllegalArgumentException(msg);
|
||||||
return false;
|
return false;
|
||||||
|
@ -674,6 +687,8 @@ public class LegalTshegBar
|
||||||
* thrown, then this combination makes a legal Tibetan syllable.
|
* thrown, then this combination makes a legal Tibetan syllable.
|
||||||
* To learn about the arguments, see {@link
|
* To learn about the arguments, see {@link
|
||||||
* #formsLegalTshegBar(char,char,char,char,boolean,boolean,String,char,char)}.
|
* #formsLegalTshegBar(char,char,char,char,boolean,boolean,String,char,char)}.
|
||||||
|
* @param errorBuf if non-null, the reason this is illegal will
|
||||||
|
* be written here, if this is illegal
|
||||||
* @return true if this syllable is legal, false if this syllable
|
* @return true if this syllable is legal, false if this syllable
|
||||||
* is illegal and throwIfIllegal is false, does not return if
|
* is illegal and throwIfIllegal is false, does not return if
|
||||||
* this syllable is illegal and throwIfIllegal is true
|
* this syllable is illegal and throwIfIllegal is true
|
||||||
|
@ -689,11 +704,13 @@ public class LegalTshegBar
|
||||||
String suffix,
|
String suffix,
|
||||||
char postsuffix,
|
char postsuffix,
|
||||||
char vowel,
|
char vowel,
|
||||||
boolean throwIfIllegal)
|
boolean throwIfIllegal,
|
||||||
|
StringBuffer errorBuf)
|
||||||
throws IllegalArgumentException
|
throws IllegalArgumentException
|
||||||
{
|
{
|
||||||
if (!isNominalRepresentationOfConsonant(rootLetter))
|
if (!isNominalRepresentationOfConsonant(rootLetter))
|
||||||
return internalThrowThing(throwIfIllegal,
|
return internalThrowThing(throwIfIllegal,
|
||||||
|
errorBuf,
|
||||||
"The root letter must be one of the standard thirty Tibetan consonants, and must be represented nominally, not, for example, by FIXED-FORM RA (\u0F6A)");
|
"The root letter must be one of the standard thirty Tibetan consonants, and must be represented nominally, not, for example, by FIXED-FORM RA (\u0F6A)");
|
||||||
|
|
||||||
if (EW_ABSENT != prefix) {
|
if (EW_ABSENT != prefix) {
|
||||||
|
@ -701,28 +718,34 @@ public class LegalTshegBar
|
||||||
// and that it can go with this root letter:
|
// and that it can go with this root letter:
|
||||||
if (!isNominalRepresentationOfPrefix(prefix))
|
if (!isNominalRepresentationOfPrefix(prefix))
|
||||||
return internalThrowThing(throwIfIllegal,
|
return internalThrowThing(throwIfIllegal,
|
||||||
|
errorBuf,
|
||||||
"The prefix is not absent, so it must be one of the five possible prefixes.");
|
"The prefix is not absent, so it must be one of the five possible prefixes.");
|
||||||
// DLC test that it can go with the root letter.
|
// DLC test that it can go with the root letter.
|
||||||
}
|
}
|
||||||
|
|
||||||
if (EW_ABSENT != subjoinedLetter) {
|
if (EW_ABSENT != subjoinedLetter) {
|
||||||
if (EWSUB_ya_btags == subjoinedLetter) {
|
if (EWC_ya == subjoinedLetter) {
|
||||||
if (!isConsonantThatTakesYaBtags(rootLetter)) {
|
if (!isConsonantThatTakesYaBtags(rootLetter)) {
|
||||||
return internalThrowThing(throwIfIllegal,
|
return internalThrowThing(throwIfIllegal,
|
||||||
|
errorBuf,
|
||||||
"Cannot subscribe ya-btags to that root letter.");
|
"Cannot subscribe ya-btags to that root letter.");
|
||||||
}
|
}
|
||||||
} else if (EWSUB_ra_btags == subjoinedLetter) {
|
} else if (EWC_ra == subjoinedLetter) {
|
||||||
if (!isConsonantThatTakesRaBtags(rootLetter)) {
|
if (!isConsonantThatTakesRaBtags(rootLetter)) {
|
||||||
return internalThrowThing(throwIfIllegal,
|
return internalThrowThing(throwIfIllegal,
|
||||||
|
errorBuf,
|
||||||
"Cannot subscribe ra-btags to that root letter.");
|
"Cannot subscribe ra-btags to that root letter.");
|
||||||
}
|
}
|
||||||
} else if (EWSUB_la_btags == subjoinedLetter) {
|
} else if (EWC_la == subjoinedLetter) {
|
||||||
if (!isConsonantThatTakesLaBtags(rootLetter)) {
|
if (!isConsonantThatTakesLaBtags(rootLetter)) {
|
||||||
return internalThrowThing(throwIfIllegal,
|
return internalThrowThing(throwIfIllegal,
|
||||||
|
errorBuf,
|
||||||
"Cannot subscribe la-btags to that root letter.");
|
"Cannot subscribe la-btags to that root letter.");
|
||||||
}
|
}
|
||||||
} else if (EWSUB_wa_zur == subjoinedLetter) {
|
} else if (EWC_wa == subjoinedLetter) {
|
||||||
throw new Error("DLC FIXME: can this happen? wa-zur comes in via the boolean argument hasWaZur, not via subjoinedLetter.");
|
return internalThrowThing(throwIfIllegal,
|
||||||
|
errorBuf,
|
||||||
|
"The presence of wa-zur must be specified via a boolean parameter.");
|
||||||
} else {
|
} else {
|
||||||
// check for a common mistake:
|
// check for a common mistake:
|
||||||
if ('\u0FBA' == subjoinedLetter
|
if ('\u0FBA' == subjoinedLetter
|
||||||
|
@ -730,9 +753,11 @@ public class LegalTshegBar
|
||||||
|| '\u0FBC' == subjoinedLetter)
|
|| '\u0FBC' == subjoinedLetter)
|
||||||
{
|
{
|
||||||
return internalThrowThing(throwIfIllegal,
|
return internalThrowThing(throwIfIllegal,
|
||||||
|
errorBuf,
|
||||||
"The subjoined letter given is subjoinable, but you gave the fixed-form variant, which is not used in Tibetan syllables but is sometimes used in Tibetan transliteration of Sanskrit, Chinese, or some non-Tibetan language.");
|
"The subjoined letter given is subjoinable, but you gave the fixed-form variant, which is not used in Tibetan syllables but is sometimes used in Tibetan transliteration of Sanskrit, Chinese, or some non-Tibetan language.");
|
||||||
}
|
}
|
||||||
return internalThrowThing(throwIfIllegal,
|
return internalThrowThing(throwIfIllegal,
|
||||||
|
errorBuf,
|
||||||
"The subjoined letter given is not one of the four consonants that may be subscribed.");
|
"The subjoined letter given is not one of the four consonants that may be subscribed.");
|
||||||
}
|
}
|
||||||
} // subjoinedLetter tests
|
} // subjoinedLetter tests
|
||||||
|
@ -743,10 +768,12 @@ public class LegalTshegBar
|
||||||
if (!getConnectiveCaseSuffix().equals(suffix)) {
|
if (!getConnectiveCaseSuffix().equals(suffix)) {
|
||||||
if (suffix.length() != 1) {
|
if (suffix.length() != 1) {
|
||||||
return internalThrowThing(throwIfIllegal,
|
return internalThrowThing(throwIfIllegal,
|
||||||
|
errorBuf,
|
||||||
"Illegal suffix -- not one of the legal complex suffixes like 'u, 'o, 'i, 'am.");
|
"Illegal suffix -- not one of the legal complex suffixes like 'u, 'o, 'i, 'am.");
|
||||||
}
|
}
|
||||||
if (!isNominalRepresentationOfSimpleSuffix(suffix.charAt(0))) {
|
if (!isNominalRepresentationOfSimpleSuffix(suffix.charAt(0))) {
|
||||||
return internalThrowThing(throwIfIllegal,
|
return internalThrowThing(throwIfIllegal,
|
||||||
|
errorBuf,
|
||||||
"Illegal suffix -- not one of the ten legal suffixes: "
|
"Illegal suffix -- not one of the ten legal suffixes: "
|
||||||
+ UnicodeUtils.unicodeCodepointToString(suffix.charAt(0)));
|
+ UnicodeUtils.unicodeCodepointToString(suffix.charAt(0)));
|
||||||
}
|
}
|
||||||
|
@ -755,6 +782,7 @@ public class LegalTshegBar
|
||||||
if (EW_ABSENT != postsuffix) {
|
if (EW_ABSENT != postsuffix) {
|
||||||
if (null == suffix)
|
if (null == suffix)
|
||||||
return internalThrowThing(throwIfIllegal,
|
return internalThrowThing(throwIfIllegal,
|
||||||
|
errorBuf,
|
||||||
"You cannot have a postsuffix unless you also have a suffix.");
|
"You cannot have a postsuffix unless you also have a suffix.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -762,11 +790,13 @@ public class LegalTshegBar
|
||||||
if (EWC_ra == headLetter) {
|
if (EWC_ra == headLetter) {
|
||||||
if (!isConsonantThatTakesRaMgo(rootLetter)) {
|
if (!isConsonantThatTakesRaMgo(rootLetter)) {
|
||||||
return internalThrowThing(throwIfIllegal,
|
return internalThrowThing(throwIfIllegal,
|
||||||
|
errorBuf,
|
||||||
"The head letter ra cannot be used with that root letter.");
|
"The head letter ra cannot be used with that root letter.");
|
||||||
}
|
}
|
||||||
} else if (EWC_la == headLetter) {
|
} else if (EWC_la == headLetter) {
|
||||||
if (!isConsonantThatTakesLaMgo(rootLetter)) {
|
if (!isConsonantThatTakesLaMgo(rootLetter)) {
|
||||||
return internalThrowThing(throwIfIllegal,
|
return internalThrowThing(throwIfIllegal,
|
||||||
|
errorBuf,
|
||||||
"The head letter la cannot be used with that root letter.");
|
"The head letter la cannot be used with that root letter.");
|
||||||
}
|
}
|
||||||
} else if (EWC_sa == headLetter) {
|
} else if (EWC_sa == headLetter) {
|
||||||
|
@ -774,15 +804,18 @@ public class LegalTshegBar
|
||||||
// handle a common error specially:
|
// handle a common error specially:
|
||||||
if (EWC_la == rootLetter)
|
if (EWC_la == rootLetter)
|
||||||
return internalThrowThing(throwIfIllegal,
|
return internalThrowThing(throwIfIllegal,
|
||||||
|
errorBuf,
|
||||||
"sa cannot be a head letter atop the root letter la. You probably meant to have sa the root letter and la the subjoined letter.");
|
"sa cannot be a head letter atop the root letter la. You probably meant to have sa the root letter and la the subjoined letter.");
|
||||||
|
|
||||||
return internalThrowThing(throwIfIllegal,
|
return internalThrowThing(throwIfIllegal,
|
||||||
|
errorBuf,
|
||||||
"The head letter sa cannot be used with that root letter.");
|
"The head letter sa cannot be used with that root letter.");
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// '\u0F6A' is not a valid head letter, even for
|
// '\u0F6A' is not a valid head letter, even for
|
||||||
// "rnya". Use EWC_ra instead.
|
// "rnya". Use EWC_ra instead.
|
||||||
return internalThrowThing(throwIfIllegal,
|
return internalThrowThing(throwIfIllegal,
|
||||||
|
errorBuf,
|
||||||
"The head letter given is not valid.");
|
"The head letter given is not valid.");
|
||||||
}
|
}
|
||||||
} // headLetter tests
|
} // headLetter tests
|
||||||
|
@ -796,16 +829,20 @@ public class LegalTshegBar
|
||||||
{
|
{
|
||||||
if (EWC_achen == vowel)
|
if (EWC_achen == vowel)
|
||||||
return internalThrowThing(throwIfIllegal,
|
return internalThrowThing(throwIfIllegal,
|
||||||
|
errorBuf,
|
||||||
"The vowel given is not valid. Use EW_ABSENT for the EWC_achen sound.");
|
"The vowel given is not valid. Use EW_ABSENT for the EWC_achen sound.");
|
||||||
if ('\u0F71' == vowel)
|
if ('\u0F71' == vowel)
|
||||||
return internalThrowThing(throwIfIllegal,
|
return internalThrowThing(throwIfIllegal,
|
||||||
"a-chung cannot be used in a simple Tibetan syllable.");
|
errorBuf,
|
||||||
|
"a-chung cannot be used in a simple Tibetan syllable."); // DLC FIXME: what about pA?
|
||||||
return internalThrowThing(throwIfIllegal,
|
return internalThrowThing(throwIfIllegal,
|
||||||
|
errorBuf,
|
||||||
"The vowel given is not valid.");
|
"The vowel given is not valid.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Phew. We got here, so this combination of inputs is valid.
|
// Phew. We got here, so this combination of inputs is valid.
|
||||||
|
// Do nothing to errorBuf.
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -40,29 +40,123 @@ public class LegalTshegBarTest extends TestCase implements UnicodeConstants {
|
||||||
|
|
||||||
/** Tests the getThdlWylie() method and one of the constructors. */
|
/** Tests the getThdlWylie() method and one of the constructors. */
|
||||||
public void testGetThdlWylie() {
|
public void testGetThdlWylie() {
|
||||||
assertTrue(new LegalTshegBar(EWC_ba, EWC_sa, EWC_ga, EWSUB_ra_btags,
|
assertTrue(new LegalTshegBar(EWC_ba, EWC_sa, EWC_ga, EWC_ra,
|
||||||
false, true, EWC_la, EWC_sa, EWV_o).getThdlWylie().toString().equals("bsgrAols"));
|
false, true, EWC_la, EWC_sa, EWV_o).getThdlWylie().toString().equals("bsgrAols"));
|
||||||
assertTrue(new LegalTshegBar(EWC_ba, EWC_sa, EWC_ga,
|
assertTrue(new LegalTshegBar(EWC_ba, EWC_sa, EWC_ga,
|
||||||
EWSUB_ra_btags, true, true,
|
EWC_ra, true, true,
|
||||||
EWC_la, EWC_sa, EWV_o).getThdlWylie().toString().equals("bsgrwAols"));
|
EWC_la, EWC_sa, EWV_o).getThdlWylie().toString().equals("bsgrwAols"));
|
||||||
assertTrue(new LegalTshegBar(EWC_ba, EWC_sa, EWC_ga,
|
assertTrue(new LegalTshegBar(EWC_ba, EWC_sa, EWC_ga,
|
||||||
EWSUB_ra_btags, false, false,
|
EWC_ra, false, false,
|
||||||
EWC_la, EWC_sa, EWV_o).getThdlWylie().toString().equals("bsgrols"));
|
EWC_la, EWC_sa, EWV_o).getThdlWylie().toString().equals("bsgrols"));
|
||||||
|
assertTrue(new LegalTshegBar(EWC_ba, EW_ABSENT, EWC_ta,
|
||||||
|
EW_ABSENT, false, false,
|
||||||
|
EWC_nga, EW_ABSENT, EW_ABSENT).getThdlWylie().toString().equals("btang"));
|
||||||
|
|
||||||
|
// dga and dag are fun, as both are represented by "\u0F51\u0F42":
|
||||||
|
{
|
||||||
|
assertTrue(new LegalTshegBar(EWC_da, EW_ABSENT, EWC_ga,
|
||||||
|
EW_ABSENT, false, false,
|
||||||
|
EW_ABSENT, EW_ABSENT, EW_ABSENT).getThdlWylie().toString().equals("dga"));
|
||||||
|
assertTrue(new LegalTshegBar(EW_ABSENT, EW_ABSENT, EWC_da,
|
||||||
|
EW_ABSENT, false, false,
|
||||||
|
EWC_ga, EW_ABSENT, EW_ABSENT).getThdlWylie().toString().equals("dag"));
|
||||||
|
}
|
||||||
|
|
||||||
|
assertTrue(new LegalTshegBar(EW_ABSENT, EWC_ra, EWC_da,
|
||||||
|
EW_ABSENT, false, false,
|
||||||
|
EWC_ga, EW_ABSENT, EW_ABSENT).getThdlWylie().toString().equals("rdag"));
|
||||||
|
assertTrue(new LegalTshegBar(EWC_ba, EWC_ra, EWC_da,
|
||||||
|
EW_ABSENT, false, false,
|
||||||
|
EWC_ga, EW_ABSENT, EW_ABSENT).getThdlWylie().toString().equals("brdag"));
|
||||||
|
|
||||||
|
assertTrue(new LegalTshegBar(EW_ABSENT, EW_ABSENT, EWC_nga,
|
||||||
|
EW_ABSENT, false, false,
|
||||||
|
"\u0F60\u0F72", EW_ABSENT, EW_ABSENT).getThdlWylie().toString().equals("nga'i"));
|
||||||
|
|
||||||
|
assertTrue(new LegalTshegBar(EW_ABSENT, EW_ABSENT, EWC_nga,
|
||||||
|
EW_ABSENT, false, false,
|
||||||
|
null, EW_ABSENT, EW_ABSENT).getThdlWylie().toString().equals("nga"));
|
||||||
|
|
||||||
|
assertTrue(new LegalTshegBar(EW_ABSENT, EW_ABSENT, EWC_sa,
|
||||||
|
EWC_la, false, false,
|
||||||
|
null, EW_ABSENT, EW_ABSENT).getThdlWylie().toString().equals("sla"));
|
||||||
|
|
||||||
|
{
|
||||||
|
boolean threw = false;
|
||||||
|
try {
|
||||||
|
new LegalTshegBar(EW_ABSENT, EWC_sa, EWC_la,
|
||||||
|
EW_ABSENT, false, false,
|
||||||
|
null, EW_ABSENT, EW_ABSENT);
|
||||||
|
} catch (IllegalArgumentException e) {
|
||||||
|
threw = true;
|
||||||
|
}
|
||||||
|
assertTrue(threw);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Tests the formsLegalTshegBar(..) method. DLC FIXME: but
|
/** Tests the formsLegalTshegBar(..) method. DLC FIXME: but
|
||||||
* doesn't test it very well. */
|
* doesn't test it very well. */
|
||||||
public void testFormsLegalTshegBar() {
|
public void testFormsLegalTshegBar() {
|
||||||
|
StringBuffer eb = new StringBuffer();
|
||||||
|
|
||||||
// Ensure that EWTS's jskad is not legal:
|
// Ensure that EWTS's jskad is not legal:
|
||||||
assertTrue(!LegalTshegBar.formsLegalTshegBar(EWC_ja, EWC_sa,
|
{
|
||||||
EWC_ka, EW_ABSENT,
|
assertTrue(!LegalTshegBar.formsLegalTshegBar(EWC_ja, EWC_sa,
|
||||||
false, false,
|
EWC_ka, EW_ABSENT,
|
||||||
EW_ABSENT, EWC_da,
|
false, false,
|
||||||
EW_ABSENT));
|
EW_ABSENT, EWC_da,
|
||||||
|
EW_ABSENT, eb));
|
||||||
|
}
|
||||||
|
|
||||||
assertTrue(LegalTshegBar.formsLegalTshegBar(EWC_ba, EW_ABSENT,
|
assertTrue(LegalTshegBar.formsLegalTshegBar(EWC_ba, EW_ABSENT,
|
||||||
EWC_ta, EW_ABSENT,
|
EWC_ta, EW_ABSENT,
|
||||||
false, false,
|
false, false,
|
||||||
EWC_da, EW_ABSENT,
|
EWC_da, EW_ABSENT,
|
||||||
EW_ABSENT));
|
EW_ABSENT, eb));
|
||||||
|
|
||||||
|
// test that there's only one way to make dwa:
|
||||||
|
assertTrue(!LegalTshegBar.formsLegalTshegBar(EW_ABSENT, EW_ABSENT,
|
||||||
|
EWC_da, EWSUB_wa_zur,
|
||||||
|
false, false,
|
||||||
|
EW_ABSENT, EW_ABSENT,
|
||||||
|
EW_ABSENT, eb));
|
||||||
|
assertTrue(!LegalTshegBar.formsLegalTshegBar(EW_ABSENT, EW_ABSENT,
|
||||||
|
EWC_da, EWC_wa,
|
||||||
|
false, false,
|
||||||
|
EW_ABSENT, EW_ABSENT,
|
||||||
|
EW_ABSENT, eb));
|
||||||
|
boolean result
|
||||||
|
= LegalTshegBar.formsLegalTshegBar(EW_ABSENT, EW_ABSENT,
|
||||||
|
EWC_da, EW_ABSENT,
|
||||||
|
true, false,
|
||||||
|
EW_ABSENT, EW_ABSENT,
|
||||||
|
EW_ABSENT, eb);
|
||||||
|
assertTrue(eb.toString(), result);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Tests the behavior of the constructors. */
|
||||||
|
public void testConstructors() {
|
||||||
|
boolean x;
|
||||||
|
|
||||||
|
x = false;
|
||||||
|
try {
|
||||||
|
new LegalTshegBar(EWC_ba, EWC_sa, EWC_ga,
|
||||||
|
EWSUB_ra_btags, false, false,
|
||||||
|
EWC_la, EWC_sa, EWV_o);
|
||||||
|
} catch (IllegalArgumentException e) {
|
||||||
|
x = true;
|
||||||
|
}
|
||||||
|
assertTrue(x);
|
||||||
|
|
||||||
|
x = false;
|
||||||
|
try {
|
||||||
|
new LegalTshegBar(EWC_ba, EWC_sa, EWC_ga,
|
||||||
|
EWSUB_ra_btags, false, false,
|
||||||
|
new String(new char[] { EWC_la }), EWC_sa,
|
||||||
|
EWV_o);
|
||||||
|
} catch (IllegalArgumentException e) {
|
||||||
|
x = true;
|
||||||
|
}
|
||||||
|
assertTrue(x);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -30,11 +30,11 @@ public class UnicodeUtils implements UnicodeConstants {
|
||||||
/** Returns true iff x is a Unicode codepoint that represents a
|
/** Returns true iff x is a Unicode codepoint that represents a
|
||||||
consonant or two-consonant stack that has a Unicode code
|
consonant or two-consonant stack that has a Unicode code
|
||||||
point. Returns true only for the usual suspects (like
|
point. Returns true only for the usual suspects (like
|
||||||
<code>\u0F40</code>) and for Sanskrit consonants (like
|
<code>U+0F40</code>) and for Sanskrit consonants (like
|
||||||
<code>\u0F71</code>) and the simple two-consonant stacks in
|
<code>U+0F71</code>) and the simple two-consonant stacks in
|
||||||
Unicode (like <code>\u0F43</code>). Returns false for, among
|
Unicode (like <code>U+0F43</code>). Returns false for, among
|
||||||
other things, subjoined consonants like
|
other things, subjoined consonants like
|
||||||
<code>\u0F90</code>. */
|
<code>U+0F90</code>. */
|
||||||
public static boolean isNonSubjoinedConsonant(char x) {
|
public static boolean isNonSubjoinedConsonant(char x) {
|
||||||
return ((x != '\u0F48' /* reserved in Unicode 3.2, but not in use */)
|
return ((x != '\u0F48' /* reserved in Unicode 3.2, but not in use */)
|
||||||
&& (x >= '\u0F40' && x <= '\u0F6A'));
|
&& (x >= '\u0F40' && x <= '\u0F6A'));
|
||||||
|
@ -43,11 +43,11 @@ public class UnicodeUtils implements UnicodeConstants {
|
||||||
/** Returns true iff x is a Unicode codepoint that represents a
|
/** Returns true iff x is a Unicode codepoint that represents a
|
||||||
subjoined consonant or subjoined two-consonant stack that has
|
subjoined consonant or subjoined two-consonant stack that has
|
||||||
a Unicode code point. Returns true only for the usual
|
a Unicode code point. Returns true only for the usual
|
||||||
suspects (like <code>\u0F90</code>) and for Sanskrit
|
suspects (like <code>U+0F90</code>) and for Sanskrit
|
||||||
consonants (like <code>\u0F9C</code>) and the simple
|
consonants (like <code>U+0F9C</code>) and the simple
|
||||||
two-consonant stacks in Unicode (like <code>\u0FAC</code>).
|
two-consonant stacks in Unicode (like <code>U+0FAC</code>).
|
||||||
Returns false for, among other things, non-subjoined
|
Returns false for, among other things, non-subjoined
|
||||||
consonants like <code>\u0F40</code>. */
|
consonants like <code>U+0F40</code>. */
|
||||||
public static boolean isSubjoinedConsonant(char x) {
|
public static boolean isSubjoinedConsonant(char x) {
|
||||||
return ((x != '\u0F98' /* reserved in Unicode 3.2, but not in use */)
|
return ((x != '\u0F98' /* reserved in Unicode 3.2, but not in use */)
|
||||||
&& (x >= '\u0F90' && x <= '\u0FBC'));
|
&& (x >= '\u0F90' && x <= '\u0FBC'));
|
||||||
|
@ -56,13 +56,13 @@ public class UnicodeUtils implements UnicodeConstants {
|
||||||
/** Returns true iff x is the preferred representation of a
|
/** Returns true iff x is the preferred representation of a
|
||||||
Tibetan or Sanskrit consonant and cannot be broken down any
|
Tibetan or Sanskrit consonant and cannot be broken down any
|
||||||
further. Returns false for, among other things, subjoined
|
further. Returns false for, among other things, subjoined
|
||||||
consonants like <code>\u0F90</code>, two-component consonants
|
consonants like <code>U+0F90</code>, two-component consonants
|
||||||
like <code>\u0F43</code>, and fixed-form consonants like
|
like <code>U+0F43</code>, and fixed-form consonants like
|
||||||
'\u0F6A'. The new consonants (for transcribing Chinese, I
|
<code>U+0F6A</code>. The new consonants (for transcribing
|
||||||
believe) "\u0F55\u0F39" (which EWTS calls "fa"),
|
Chinese, I believe) "\u0F55\u0F39" (which EWTS calls
|
||||||
"\u0F56\u0F39" ("va"), and "\u0F5F\u0F39" ("Dza") are
|
"fa"), "\u0F56\u0F39" ("va"), and
|
||||||
two-codepoint sequences, but you should be aware of them
|
"\u0F5F\u0F39" ("Dza") are two-codepoint sequences,
|
||||||
also. */
|
but you should be aware of them also. */
|
||||||
public static boolean isPreferredFormOfConsonant(char x) {
|
public static boolean isPreferredFormOfConsonant(char x) {
|
||||||
return ((x != '\u0F48' /* reserved in Unicode 3.2, but not in use */)
|
return ((x != '\u0F48' /* reserved in Unicode 3.2, but not in use */)
|
||||||
&& (x >= '\u0F40' && x <= '\u0F68')
|
&& (x >= '\u0F40' && x <= '\u0F68')
|
||||||
|
@ -97,7 +97,7 @@ public class UnicodeUtils implements UnicodeConstants {
|
||||||
Unicode codepoints, into either Normalization Form KD (NFKD),
|
Unicode codepoints, into either Normalization Form KD (NFKD),
|
||||||
D (NFD), or THDL (NFTHDL), depending on the value of normForm.
|
D (NFD), or THDL (NFTHDL), depending on the value of normForm.
|
||||||
NFD and NFKD are specified by Unicode 3.2; NFTHDL is needed
|
NFD and NFKD are specified by Unicode 3.2; NFTHDL is needed
|
||||||
for {@link org.thdl.tib.text.tshegbar#UnicodeGraphemeCluster}
|
for {@link org.thdl.tib.text.tshegbar.UnicodeGraphemeCluster}
|
||||||
because NFKD normalizes <code>U+0F0C</code> and neither NFD
|
because NFKD normalizes <code>U+0F0C</code> and neither NFD
|
||||||
nor NFKD breaks down <code>U+0F00</code> into its constituent
|
nor NFKD breaks down <code>U+0F00</code> into its constituent
|
||||||
codepoints. NFTHDL uses a maximum of codepoints, and it never
|
codepoints. NFTHDL uses a maximum of codepoints, and it never
|
||||||
|
@ -247,7 +247,7 @@ public class UnicodeUtils implements UnicodeConstants {
|
||||||
|
|
||||||
/** Returns true iff ch corresponds to the Tibetan letter wa.
|
/** Returns true iff ch corresponds to the Tibetan letter wa.
|
||||||
Several Unicode codepoints correspond to the Tibetan letter
|
Several Unicode codepoints correspond to the Tibetan letter
|
||||||
wa. Oftentimes, <code>\u0F5D</code> is thought of as the
|
wa. Oftentimes, <code>U+0F5D</code> is thought of as the
|
||||||
nominal representation. */
|
nominal representation. */
|
||||||
public static boolean isWa(char ch) {
|
public static boolean isWa(char ch) {
|
||||||
return ('\u0F5D' == ch
|
return ('\u0F5D' == ch
|
||||||
|
@ -257,7 +257,7 @@ public class UnicodeUtils implements UnicodeConstants {
|
||||||
|
|
||||||
/** Returns true iff ch corresponds to the Tibetan letter ya.
|
/** Returns true iff ch corresponds to the Tibetan letter ya.
|
||||||
Several Unicode codepoints correspond to the Tibetan letter
|
Several Unicode codepoints correspond to the Tibetan letter
|
||||||
ya. Oftentimes, <code>\u0F61</code> is thought of as the
|
ya. Oftentimes, <code>U+0F61</code> is thought of as the
|
||||||
nominal representation. */
|
nominal representation. */
|
||||||
public static boolean isYa(char ch) {
|
public static boolean isYa(char ch) {
|
||||||
return ('\u0F61' == ch
|
return ('\u0F61' == ch
|
||||||
|
@ -267,7 +267,7 @@ public class UnicodeUtils implements UnicodeConstants {
|
||||||
|
|
||||||
/** Returns true iff there exists at least one codepoint cp in
|
/** Returns true iff there exists at least one codepoint cp in
|
||||||
unicodeString such that cp {@link #isRa(char) is ra} or contains
|
unicodeString such that cp {@link #isRa(char) is ra} or contains
|
||||||
ra (like <code>\u0F77</code>). This method is not implemented
|
ra (like <code>U+0F77</code>). This method is not implemented
|
||||||
as fast as it could be. It calls on the canonicalization code
|
as fast as it could be. It calls on the canonicalization code
|
||||||
in order to maximize reuse and minimize the possibility of
|
in order to maximize reuse and minimize the possibility of
|
||||||
coder error. */
|
coder error. */
|
||||||
|
@ -298,6 +298,9 @@ public class UnicodeUtils implements UnicodeConstants {
|
||||||
return "\\u" + Integer.toHexString((int)cp);
|
return "\\u" + Integer.toHexString((int)cp);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a human-readable, ASCII form of the String s of Unicode
|
||||||
|
* codepoints. */
|
||||||
public static String unicodeStringToString(String s) {
|
public static String unicodeStringToString(String s) {
|
||||||
StringBuffer sb = new StringBuffer(s.length() * 6);
|
StringBuffer sb = new StringBuffer(s.length() * 6);
|
||||||
for (int i = 0; i < s.length(); i++) {
|
for (int i = 0; i < s.length(); i++) {
|
||||||
|
|
|
@ -40,10 +40,13 @@ public class UnicodeUtilsTest extends TestCase implements UnicodeConstants {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Tests Unicode Normalization form KD for Tibetan codepoints.
|
/** Tests Unicode Normalization form KD for Tibetan codepoints.
|
||||||
See Unicode, Inc.'s NormalizationTest-3.2.0.txt. This
|
* See Unicode, Inc.'s NormalizationTest-3.2.0.txt. This
|
||||||
contains all test cases for
|
* contains all test cases for
|
||||||
<code>U+0F00</code>-<code>U+0FFF</code> there, and a few
|
* <code>U+0F00</code>-<code>U+0FFF</code> there, and a few more.
|
||||||
more. */
|
* Tests both {@link
|
||||||
|
* UnicodeUtils#toMostlyDecomposedUnicode(String, byte)} and
|
||||||
|
* {@link UnicodeUtils#toMostlyDecomposedUnicode(StringBuffer,
|
||||||
|
* byte)}.*/
|
||||||
public void testMostlyNFKD() {
|
public void testMostlyNFKD() {
|
||||||
assertTrue(UnicodeUtils.toMostlyDecomposedUnicode("\u0F0B", NORM_NFKD).equals("\u0F0B"));
|
assertTrue(UnicodeUtils.toMostlyDecomposedUnicode("\u0F0B", NORM_NFKD).equals("\u0F0B"));
|
||||||
assertTrue(UnicodeUtils.toMostlyDecomposedUnicode("\u0F40", NORM_NFKD).equals("\u0F40"));
|
assertTrue(UnicodeUtils.toMostlyDecomposedUnicode("\u0F40", NORM_NFKD).equals("\u0F40"));
|
||||||
|
@ -112,10 +115,13 @@ public class UnicodeUtilsTest extends TestCase implements UnicodeConstants {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Tests Unicode Normalization form D for Tibetan codepoints.
|
/** Tests Unicode Normalization form D for Tibetan codepoints.
|
||||||
See Unicode, Inc.'s NormalizationTest-3.2.0.txt. This
|
* See Unicode, Inc.'s NormalizationTest-3.2.0.txt. This
|
||||||
contains all test cases for
|
* contains all test cases for
|
||||||
<code>U+0F00</code>-<code>U+0FFF</code> there, and a few
|
* <code>U+0F00</code>-<code>U+0FFF</code> there, and a few more.
|
||||||
more. */
|
* Tests both {@link
|
||||||
|
* UnicodeUtils#toMostlyDecomposedUnicode(String, byte)} and
|
||||||
|
* {@link UnicodeUtils#toMostlyDecomposedUnicode(StringBuffer,
|
||||||
|
* byte)}.*/
|
||||||
public void testMostlyNFD() {
|
public void testMostlyNFD() {
|
||||||
assertTrue(UnicodeUtils.toMostlyDecomposedUnicode("\u0F0B", NORM_NFD).equals("\u0F0B"));
|
assertTrue(UnicodeUtils.toMostlyDecomposedUnicode("\u0F0B", NORM_NFD).equals("\u0F0B"));
|
||||||
assertTrue(UnicodeUtils.toMostlyDecomposedUnicode("\u0F40", NORM_NFD).equals("\u0F40"));
|
assertTrue(UnicodeUtils.toMostlyDecomposedUnicode("\u0F40", NORM_NFD).equals("\u0F40"));
|
||||||
|
@ -184,10 +190,13 @@ public class UnicodeUtilsTest extends TestCase implements UnicodeConstants {
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Tests Unicode Normalization form THDL for Tibetan codepoints.
|
/** Tests Unicode Normalization form THDL for Tibetan codepoints.
|
||||||
See Unicode, Inc.'s NormalizationTest-3.2.0.txt. This
|
* See Unicode, Inc.'s NormalizationTest-3.2.0.txt. This
|
||||||
contains all test cases for
|
* contains all test cases for
|
||||||
<code>U+0F00</code>-<code>U+0FFF</code> there, and a few
|
* <code>U+0F00</code>-<code>U+0FFF</code> there, and a few more.
|
||||||
more. */
|
* Tests both {@link
|
||||||
|
* UnicodeUtils#toMostlyDecomposedUnicode(String, byte)} and
|
||||||
|
* {@link UnicodeUtils#toMostlyDecomposedUnicode(StringBuffer,
|
||||||
|
* byte)}. */
|
||||||
public void testMostlyNFTHDL() {
|
public void testMostlyNFTHDL() {
|
||||||
assertTrue(UnicodeUtils.toMostlyDecomposedUnicode("\u0F0B", NORM_NFTHDL).equals("\u0F0B"));
|
assertTrue(UnicodeUtils.toMostlyDecomposedUnicode("\u0F0B", NORM_NFTHDL).equals("\u0F0B"));
|
||||||
assertTrue(UnicodeUtils.toMostlyDecomposedUnicode("\u0F40", NORM_NFTHDL).equals("\u0F40"));
|
assertTrue(UnicodeUtils.toMostlyDecomposedUnicode("\u0F40", NORM_NFTHDL).equals("\u0F40"));
|
||||||
|
@ -253,10 +262,36 @@ public class UnicodeUtilsTest extends TestCase implements UnicodeConstants {
|
||||||
assertTrue(UnicodeUtils.toMostlyDecomposedUnicode("\u0F79", NORM_NFTHDL).equals("\u0FB3\u0F71\u0F80"));
|
assertTrue(UnicodeUtils.toMostlyDecomposedUnicode("\u0F79", NORM_NFTHDL).equals("\u0FB3\u0F71\u0F80"));
|
||||||
assertTrue(UnicodeUtils.toMostlyDecomposedUnicode("\u0FB3\u0F81", NORM_NFTHDL).equals("\u0FB3\u0F71\u0F80"));
|
assertTrue(UnicodeUtils.toMostlyDecomposedUnicode("\u0FB3\u0F81", NORM_NFTHDL).equals("\u0FB3\u0F71\u0F80"));
|
||||||
assertTrue(UnicodeUtils.toMostlyDecomposedUnicode("\u0FB3\u0F71\u0F80", NORM_NFTHDL).equals("\u0FB3\u0F71\u0F80"));
|
assertTrue(UnicodeUtils.toMostlyDecomposedUnicode("\u0FB3\u0F71\u0F80", NORM_NFTHDL).equals("\u0FB3\u0F71\u0F80"));
|
||||||
|
|
||||||
|
|
||||||
|
assertTrue(UnicodeUtils.toMostlyDecomposedUnicode("", NORM_NFTHDL).equals(""));
|
||||||
|
|
||||||
|
{
|
||||||
|
StringBuffer sb = new StringBuffer("\u0FAC");
|
||||||
|
UnicodeUtils.toMostlyDecomposedUnicode(sb, NORM_NFTHDL);
|
||||||
|
assertTrue(sb.toString().equals("\u0FAB\u0FB7"));
|
||||||
|
}
|
||||||
|
{
|
||||||
|
StringBuffer sb = new StringBuffer("\u0F66");
|
||||||
|
UnicodeUtils.toMostlyDecomposedUnicode(sb, NORM_NFTHDL);
|
||||||
|
assertTrue(sb.toString().equals("\u0F66"));
|
||||||
|
}
|
||||||
|
{
|
||||||
|
StringBuffer sb = new StringBuffer("");
|
||||||
|
UnicodeUtils.toMostlyDecomposedUnicode(sb, NORM_NFTHDL);
|
||||||
|
assertTrue(sb.toString().equals(""));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Tests the containsRa method. */
|
/** Tests the containsRa method. */
|
||||||
public void testContainsRa() {
|
public void testContainsRa() {
|
||||||
|
assertTrue(!UnicodeUtils.containsRa('\u0F69'));
|
||||||
|
assertTrue(!UnicodeUtils.containsRa('\u0FB1'));
|
||||||
|
assertTrue(!UnicodeUtils.containsRa('\u0F48'));
|
||||||
|
assertTrue(!UnicodeUtils.containsRa('\u0060'));
|
||||||
|
assertTrue(!UnicodeUtils.containsRa('\uFFFF'));
|
||||||
|
assertTrue(!UnicodeUtils.containsRa('\uFFFF'));
|
||||||
|
|
||||||
assertTrue(UnicodeUtils.containsRa('\u0FB2'));
|
assertTrue(UnicodeUtils.containsRa('\u0FB2'));
|
||||||
assertTrue(UnicodeUtils.containsRa('\u0F77'));
|
assertTrue(UnicodeUtils.containsRa('\u0F77'));
|
||||||
assertTrue(UnicodeUtils.containsRa('\u0F76'));
|
assertTrue(UnicodeUtils.containsRa('\u0F76'));
|
||||||
|
@ -264,4 +299,84 @@ public class UnicodeUtilsTest extends TestCase implements UnicodeConstants {
|
||||||
assertTrue(UnicodeUtils.containsRa('\u0F62'));
|
assertTrue(UnicodeUtils.containsRa('\u0F62'));
|
||||||
assertTrue(UnicodeUtils.containsRa('\u0FBC'));
|
assertTrue(UnicodeUtils.containsRa('\u0FBC'));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests the {@link UnicodeUtils#unicodeStringToString(String)}
|
||||||
|
* method. */
|
||||||
|
public void testUnicodeStringToString() {
|
||||||
|
assertTrue(UnicodeUtils.unicodeStringToString("\u0000").equals("\\u0000"));
|
||||||
|
assertTrue(UnicodeUtils.unicodeStringToString("\u0001").equals("\\u0001"));
|
||||||
|
assertTrue(UnicodeUtils.unicodeStringToString("\u000F").equals("\\u000f"));
|
||||||
|
assertTrue(UnicodeUtils.unicodeStringToString("\u001F").equals("\\u001f"));
|
||||||
|
assertTrue(UnicodeUtils.unicodeStringToString("\u00fF").equals("\\u00ff"));
|
||||||
|
assertTrue(UnicodeUtils.unicodeStringToString("\u01fF").equals("\\u01ff"));
|
||||||
|
assertTrue(UnicodeUtils.unicodeStringToString("\u0ffF").equals("\\u0fff"));
|
||||||
|
assertTrue(UnicodeUtils.unicodeStringToString("\u1ffF").equals("\\u1fff"));
|
||||||
|
assertTrue(UnicodeUtils.unicodeStringToString("\ufffF").equals("\\uffff"));
|
||||||
|
|
||||||
|
assertTrue(UnicodeUtils.unicodeStringToString("\u0F00\u0091\uABCD\u0FFF\u0Ff1\uFFFF\u0000").equals("\\u0f00\\u0091\\uabcd\\u0fff\\u0ff1\\uffff\\u0000"));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests the {@link UnicodeUtils#unicodeCodepointToString(char)}
|
||||||
|
* method. */
|
||||||
|
public void testUnicodeCodepointToString() {
|
||||||
|
assertTrue(UnicodeUtils.unicodeCodepointToString('\u0000').equals("\\u0000"));
|
||||||
|
assertTrue(UnicodeUtils.unicodeCodepointToString('\u0001').equals("\\u0001"));
|
||||||
|
assertTrue(UnicodeUtils.unicodeCodepointToString('\u000F').equals("\\u000f"));
|
||||||
|
assertTrue(UnicodeUtils.unicodeCodepointToString('\u001F').equals("\\u001f"));
|
||||||
|
assertTrue(UnicodeUtils.unicodeCodepointToString('\u00fF').equals("\\u00ff"));
|
||||||
|
assertTrue(UnicodeUtils.unicodeCodepointToString('\u01fF').equals("\\u01ff"));
|
||||||
|
assertTrue(UnicodeUtils.unicodeCodepointToString('\u0ffF').equals("\\u0fff"));
|
||||||
|
assertTrue(UnicodeUtils.unicodeCodepointToString('\u1ffF').equals("\\u1fff"));
|
||||||
|
assertTrue(UnicodeUtils.unicodeCodepointToString('\ufffF').equals("\\uffff"));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests the {@link UnicodeUtils#isEntirelyTibetanUnicode(String)}
|
||||||
|
* method. */
|
||||||
|
public void testIsEntirelyTibetanUnicode() {
|
||||||
|
assertTrue(UnicodeUtils.isEntirelyTibetanUnicode("\u0F00\u0FFF\u0F00\u0F1e\u0F48")); // U+0F48 is reserved, but in the range.
|
||||||
|
assertTrue(!UnicodeUtils.isEntirelyTibetanUnicode("\u0F00\u1000\u0FFF\u0F00\u0F1e\u0F48")); // U+0F48 is reserved, but in the range.
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests the {@link UnicodeUtils#isTibetanConsonant(char)}
|
||||||
|
* method. */
|
||||||
|
public void testIsTibetanConsonant() {
|
||||||
|
assertTrue(!UnicodeUtils.isTibetanConsonant('\u0000'));
|
||||||
|
assertTrue(!UnicodeUtils.isTibetanConsonant('\uF000'));
|
||||||
|
assertTrue(!UnicodeUtils.isTibetanConsonant('\u0EFF'));
|
||||||
|
assertTrue(!UnicodeUtils.isTibetanConsonant('\u1000'));
|
||||||
|
assertTrue(!UnicodeUtils.isTibetanConsonant('\u0F00'));
|
||||||
|
assertTrue(!UnicodeUtils.isTibetanConsonant('\u0FFF'));
|
||||||
|
|
||||||
|
assertTrue(UnicodeUtils.isTibetanConsonant('\u0FB2'));
|
||||||
|
assertTrue(UnicodeUtils.isTibetanConsonant('\u0F6A'));
|
||||||
|
assertTrue(UnicodeUtils.isTibetanConsonant('\u0F40'));
|
||||||
|
assertTrue(UnicodeUtils.isTibetanConsonant('\u0F50'));
|
||||||
|
assertTrue(UnicodeUtils.isTibetanConsonant('\u0FBC'));
|
||||||
|
assertTrue(UnicodeUtils.isTibetanConsonant('\u0FB9'));
|
||||||
|
assertTrue(UnicodeUtils.isTibetanConsonant('\u0FB0'));
|
||||||
|
assertTrue(UnicodeUtils.isTibetanConsonant('\u0FAD'));
|
||||||
|
assertTrue(UnicodeUtils.isTibetanConsonant('\u0FA6'));
|
||||||
|
assertTrue(UnicodeUtils.isTibetanConsonant('\u0F90'));
|
||||||
|
assertTrue(UnicodeUtils.isTibetanConsonant('\u0F91'));
|
||||||
|
|
||||||
|
// reserved codepoints:
|
||||||
|
assertTrue(!UnicodeUtils.isTibetanConsonant('\u0F48'));
|
||||||
|
assertTrue(!UnicodeUtils.isTibetanConsonant('\u0F98'));
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tests the {@link UnicodeUtils#isInTibetanRange(char)}
|
||||||
|
* method. */
|
||||||
|
public void testIsInTibetanRange() {
|
||||||
|
assertTrue(!UnicodeUtils.isInTibetanRange('\u0000'));
|
||||||
|
assertTrue(!UnicodeUtils.isInTibetanRange('\u0100'));
|
||||||
|
assertTrue(!UnicodeUtils.isInTibetanRange('\u1000'));
|
||||||
|
assertTrue(UnicodeUtils.isInTibetanRange('\u0F00'));
|
||||||
|
assertTrue(UnicodeUtils.isInTibetanRange('\u0FF0'));
|
||||||
|
assertTrue(UnicodeUtils.isInTibetanRange('\u0FFF'));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue