The *->Unicode conversions were outputting Unicode that was not

well-formed.  They still do, but they do it less often.

Chris Fynn wrote this a while back:

   By normal Tibetan & Dzongkha spelling, writing, and input rules
   Tibetan script stacks should be entered and written: 1 headline
   consonant (0F40->0F6A), any subjoined consonant(s) (0F90-> 0F9C),
   achung (0F71), shabkyu (0F74), any above headline vowel(s) (0F72
   0F7A 0F7B 0F7C 0F7D and 0F80); any ngaro (0F7E, 0F82 and 0F83).

Now efforts are made to ensure that the converters conform to the
above rules.
This commit is contained in:
dchandler 2004-12-13 02:32:46 +00:00
parent 3115f22484
commit aa5d86a6e3
3 changed files with 138 additions and 10 deletions

View file

@ -379,4 +379,32 @@ public class UnicodeUtilsTest extends TestCase implements UnicodeConstants {
assertTrue(UnicodeUtils.isInTibetanRange('\u0FF0'));
assertTrue(UnicodeUtils.isInTibetanRange('\u0FFF'));
}
/**
* Tests the {@link UnicodeUtils#fixSomeOrderingErrorsInTibetanUnicode(StringBuffer)}
* method. */
public void testFixSomeOrderingErrorsInTibetanUnicode() {
// Test that "\u0f67\u0f72\u0f71" becomes "\u0f67\u0f71\u0f72", e.g:
String tt[][] = {
{ "\u0f67\u0f72\u0f71", "\u0f67\u0f71\u0f72" },
{ "\u0f7a\u0f72\u0f71", "\u0f71\u0f7a\u0f72" },
{ "\u0f67\u0f7e\u0f71", "\u0f67\u0f71\u0f7e" },
{ "\u0f67\u0f74\u0f71", "\u0f67\u0f71\u0f74" },
{ "\u0f67\u0f7e\u0f72", "\u0f67\u0f72\u0f7e" },
{ "\u0f67\u0f7e\u0f74", "\u0f67\u0f74\u0f7e" },
};
for (int i = 0; i < tt.length; i++) {
StringBuffer sb = new StringBuffer(tt[i][0]);
assertTrue(true == UnicodeUtils.fixSomeOrderingErrorsInTibetanUnicode(sb));
assertTrue(sb.toString().equals(tt[i][1]));
}
// Test that "\u0f67\u0f71\u0f72" stays the same, e.g.:
String uu[] = { "\u0f67\u0f71\u0f72" };
for (int i = 0; i < uu.length; i++) {
StringBuffer sb = new StringBuffer(uu[i]);
assertTrue(false == UnicodeUtils.fixSomeOrderingErrorsInTibetanUnicode(sb));
assertTrue(sb.toString().equals(uu[i]));
}
}
}